diff options
author | Lars Wirzenius <liw@liw.fi> | 2021-04-25 13:28:37 +0000 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2021-04-25 13:28:37 +0000 |
commit | fef7f220e2385ccfb423201958021a33c26b69c9 (patch) | |
tree | b8ff5b6438252a5c08ae00c796625a00ae453643 | |
parent | 6ee2450e2c0478440c56f9a0f754edaaa901d957 (diff) | |
parent | 9c3893cf616279e4101b1ccf52d0a82b7a334b3b (diff) | |
download | obnam2-fef7f220e2385ccfb423201958021a33c26b69c9.tar.gz |
Merge branch 'feature/78-cachedir-tag' into 'main'
feat: add support for CACHEDIR.TAG
Closes #78
See merge request larswirzenius/obnam!138
-rwxr-xr-x | check | 2 | ||||
-rw-r--r-- | obnam.md | 60 | ||||
-rw-r--r-- | src/cmd/backup.rs | 4 | ||||
-rw-r--r-- | src/config.rs | 4 | ||||
-rw-r--r-- | src/fsiter.rs | 98 | ||||
-rw-r--r-- | subplot/data.py | 7 | ||||
-rw-r--r-- | subplot/data.yaml | 3 |
7 files changed, 164 insertions, 14 deletions
@@ -20,7 +20,7 @@ got_cargo_cmd() cargo "$1" --help > /dev/null } -got_cargo_cmd clippy && cargo clippy -q --all-targets +got_cargo_cmd clippy && $hideok cargo clippy --all-targets $hideok cargo build --all-targets got_cargo_cmd fmt && $hideok cargo fmt -- --check $hideok cargo test @@ -1519,6 +1519,66 @@ roots: - live/two ~~~ +## CACHEDIR.TAG support + +### By default, skip directories containing CACHEDIR.TAG + +This scenario verifies that Obnam client skips the contents of directories that +contain [CACHEDIR.TAG][], but backs up the tag itself. + +[CACHEDIR.TAG]: https://bford.info/cachedir/ + +~~~scenario +given an installed obnam +and a running chunk server +and a client config based on client.yaml +and a file live/ignored/data.dat containing some random data +and a cache directory tag in live/ignored +and a file live/not_ignored/data.dat containing some random data +and a manifest of the directory live/not_ignored in initial.yaml +when I run obnam backup +then backup generation is GEN +when I invoke obnam restore <GEN> rest +given a manifest of the directory live/not_ignored restored in rest in restored.yaml +then manifests initial.yaml and restored.yaml match +then file rest/live/ignored/CACHEDIR.TAG contains "Signature: 8a477f597d28d172789f06886806bc55" +then file rest/live/ignored/data.dat does not exist +~~~ + +~~~{#client.yaml .file .yaml .numberLines} +roots: +- live +~~~ + +### Can ignore CACHEDIR.TAGs if told to do so + +This scenario verifies that when `exclude_cache_tag_directories` setting is +disabled, Obnam client backs up directories even if they +contain [CACHEDIR.TAG][]. + +[CACHEDIR.TAG]: https://bford.info/cachedir/ + +~~~scenario +given an installed obnam +and a running chunk server +and a client config based on client_includes_cachedirs.yaml +and a file live/ignored/data.dat containing some random data +and a cache directory tag in live/ignored +and a file live/not_ignored/data.dat containing some random data +and a manifest of the directory live in initial.yaml +when I run obnam backup +then backup generation is GEN +when I invoke obnam restore <GEN> rest +given a manifest of the directory live restored in rest in restored.yaml +then manifests initial.yaml and restored.yaml match +~~~ + +~~~{#client_includes_cachedirs.yaml .file .yaml .numberLines} +roots: +- live +exclude_cache_tag_directories: false +~~~ + # Acceptance criteria for backup encryption diff --git a/src/cmd/backup.rs b/src/cmd/backup.rs index a0e0599..0479844 100644 --- a/src/cmd/backup.rs +++ b/src/cmd/backup.rs @@ -65,7 +65,7 @@ fn initial_backup( let count = { let mut new = NascentGeneration::create(newtemp.path())?; for root in &config.roots { - let iter = FsIterator::new(root); + let iter = FsIterator::new(root, config.exclude_cache_tag_directories); let warnings = new.insert_iter(iter.map(|entry| run.backup(entry)))?; for w in warnings { all_warnings.push(w); @@ -95,7 +95,7 @@ fn incremental_backup( run.start_backup(&old)?; let mut new = NascentGeneration::create(newtemp.path())?; for root in &config.roots { - let iter = FsIterator::new(root); + let iter = FsIterator::new(root, config.exclude_cache_tag_directories); let warnings = new.insert_iter(iter.map(|entry| run.backup(entry, &old)))?; for w in warnings { all_warnings.push(w); diff --git a/src/config.rs b/src/config.rs index d6ffbc5..6881959 100644 --- a/src/config.rs +++ b/src/config.rs @@ -17,6 +17,7 @@ struct TentativeClientConfig { roots: Vec<PathBuf>, log: Option<PathBuf>, encrypt: Option<bool>, + exclude_cache_tag_directories: Option<bool>, } #[derive(Debug, Serialize, Clone)] @@ -59,6 +60,7 @@ pub struct ClientConfigWithoutPasswords { pub roots: Vec<PathBuf>, pub log: PathBuf, pub encrypt: bool, + pub exclude_cache_tag_directories: bool, } #[derive(Debug, thiserror::Error)] @@ -91,6 +93,7 @@ impl ClientConfigWithoutPasswords { let tentative: TentativeClientConfig = serde_yaml::from_str(&config)?; let encrypt = tentative.encrypt.or(Some(false)).unwrap(); + let exclude_cache_tag_directories = tentative.exclude_cache_tag_directories.unwrap_or(true); let config = Self { filename: filename.to_path_buf(), @@ -103,6 +106,7 @@ impl ClientConfigWithoutPasswords { .or_else(|| Some(PathBuf::from(DEVNULL))) .unwrap(), encrypt, + exclude_cache_tag_directories, }; config.check()?; diff --git a/src/fsiter.rs b/src/fsiter.rs index b778cf3..6c18404 100644 --- a/src/fsiter.rs +++ b/src/fsiter.rs @@ -5,7 +5,7 @@ use walkdir::{DirEntry, IntoIter, WalkDir}; /// Iterator over file system entries in a directory tree. pub struct FsIterator { - iter: IntoIter, + iter: SkipCachedirs, } #[derive(Debug, thiserror::Error)] @@ -23,9 +23,12 @@ pub enum FsIterError { pub type FsIterResult<T> = Result<T, FsIterError>; impl FsIterator { - pub fn new(root: &Path) -> Self { + pub fn new(root: &Path, exclude_cache_tag_directories: bool) -> Self { Self { - iter: WalkDir::new(root).into_iter(), + iter: SkipCachedirs::new( + WalkDir::new(root).into_iter(), + exclude_cache_tag_directories, + ), } } } @@ -33,18 +36,91 @@ impl FsIterator { impl Iterator for FsIterator { type Item = FsIterResult<FilesystemEntry>; fn next(&mut self) -> Option<Self::Item> { - let next = self.iter.next(); - debug!("walkdir found: {:?}", next); - match next { - None => None, - Some(Ok(entry)) => Some(new_entry(&entry)), - Some(Err(err)) => Some(Err(err.into())), + self.iter.next() + } +} + +/// Cachedir-aware adaptor for WalkDir: it skips the contents of dirs that contain CACHEDIR.TAG, +/// but still yields entries for the dir and the tag themselves. +struct SkipCachedirs { + iter: IntoIter, + exclude_cache_tag_directories: bool, + // This is the last tag we've found. `next()` will yield it before asking `iter` for more + // entries. + cachedir_tag: Option<FsIterResult<FilesystemEntry>>, +} + +impl SkipCachedirs { + fn new(iter: IntoIter, exclude_cache_tag_directories: bool) -> Self { + Self { + iter, + exclude_cache_tag_directories, + cachedir_tag: None, + } + } + + fn try_enqueue_cachedir_tag(&mut self, entry: &DirEntry) { + if !self.exclude_cache_tag_directories { + return; } + + // If this entry is not a directory, it means we already processed its + // parent dir and decided that it's not cached. + if !entry.file_type().is_dir() { + return; + } + + let mut tag_path = entry.path().to_owned(); + tag_path.push("CACHEDIR.TAG"); + + // Tags are required to be regular files -- not even symlinks are allowed. + if !tag_path.is_file() { + return; + }; + + const CACHEDIR_TAG: &[u8] = b"Signature: 8a477f597d28d172789f06886806bc55"; + let mut content = [0u8; CACHEDIR_TAG.len()]; + + let mut file = if let Ok(file) = std::fs::File::open(&tag_path) { + file + } else { + return; + }; + + use std::io::Read; + match file.read_exact(&mut content) { + Ok(_) => (), + // If we can't read the tag file, proceed as if's not there + Err(_) => return, + } + + if content == CACHEDIR_TAG { + self.iter.skip_current_dir(); + self.cachedir_tag = Some(new_entry(&tag_path)); + } + } +} + +impl Iterator for SkipCachedirs { + type Item = FsIterResult<FilesystemEntry>; + + fn next(&mut self) -> Option<Self::Item> { + self.cachedir_tag.take().or_else(|| { + let next = self.iter.next(); + debug!("walkdir found: {:?}", next); + match next { + None => None, + Some(Err(err)) => Some(Err(err.into())), + Some(Ok(entry)) => { + self.try_enqueue_cachedir_tag(&entry); + Some(new_entry(entry.path())) + } + } + }) } } -fn new_entry(e: &DirEntry) -> FsIterResult<FilesystemEntry> { - let path = e.path(); +fn new_entry(path: &Path) -> FsIterResult<FilesystemEntry> { let meta = std::fs::symlink_metadata(path); debug!("metadata for {:?}: {:?}", path, meta); let meta = match meta { diff --git a/subplot/data.py b/subplot/data.py index 3833f2e..d134e5f 100644 --- a/subplot/data.py +++ b/subplot/data.py @@ -29,6 +29,13 @@ def create_fifo(ctx, filename=None): os.mkfifo(filename) +def create_cachedir_tag_in(ctx, dirpath=None): + filepath = f"{dirpath}/CACHEDIR.TAG" + logging.debug(f"creating {filepath}") + os.makedirs(dirpath, exist_ok=True) + open(filepath, "w").write("Signature: 8a477f597d28d172789f06886806bc55") + + def create_nonutf8_filename(ctx, dirname=None): filename = "\x88" os.mkdir(dirname) diff --git a/subplot/data.yaml b/subplot/data.yaml index 699c5b1..dcc6807 100644 --- a/subplot/data.yaml +++ b/subplot/data.yaml @@ -10,6 +10,9 @@ - given: "a named pipe {filename}" function: create_fifo +- given: a cache directory tag in {dirpath} + function: create_cachedir_tag_in + - given: "a file in {dirname} with a non-UTF8 filename" function: create_nonutf8_filename |