summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2021-04-25 13:28:37 +0000
committerLars Wirzenius <liw@liw.fi>2021-04-25 13:28:37 +0000
commitfef7f220e2385ccfb423201958021a33c26b69c9 (patch)
treeb8ff5b6438252a5c08ae00c796625a00ae453643
parent6ee2450e2c0478440c56f9a0f754edaaa901d957 (diff)
parent9c3893cf616279e4101b1ccf52d0a82b7a334b3b (diff)
downloadobnam2-fef7f220e2385ccfb423201958021a33c26b69c9.tar.gz
Merge branch 'feature/78-cachedir-tag' into 'main'
feat: add support for CACHEDIR.TAG Closes #78 See merge request larswirzenius/obnam!138
-rwxr-xr-xcheck2
-rw-r--r--obnam.md60
-rw-r--r--src/cmd/backup.rs4
-rw-r--r--src/config.rs4
-rw-r--r--src/fsiter.rs98
-rw-r--r--subplot/data.py7
-rw-r--r--subplot/data.yaml3
7 files changed, 164 insertions, 14 deletions
diff --git a/check b/check
index 3156058..bcb04e0 100755
--- a/check
+++ b/check
@@ -20,7 +20,7 @@ got_cargo_cmd()
cargo "$1" --help > /dev/null
}
-got_cargo_cmd clippy && cargo clippy -q --all-targets
+got_cargo_cmd clippy && $hideok cargo clippy --all-targets
$hideok cargo build --all-targets
got_cargo_cmd fmt && $hideok cargo fmt -- --check
$hideok cargo test
diff --git a/obnam.md b/obnam.md
index e5a5447..e788d96 100644
--- a/obnam.md
+++ b/obnam.md
@@ -1519,6 +1519,66 @@ roots:
- live/two
~~~
+## CACHEDIR.TAG support
+
+### By default, skip directories containing CACHEDIR.TAG
+
+This scenario verifies that Obnam client skips the contents of directories that
+contain [CACHEDIR.TAG][], but backs up the tag itself.
+
+[CACHEDIR.TAG]: https://bford.info/cachedir/
+
+~~~scenario
+given an installed obnam
+and a running chunk server
+and a client config based on client.yaml
+and a file live/ignored/data.dat containing some random data
+and a cache directory tag in live/ignored
+and a file live/not_ignored/data.dat containing some random data
+and a manifest of the directory live/not_ignored in initial.yaml
+when I run obnam backup
+then backup generation is GEN
+when I invoke obnam restore <GEN> rest
+given a manifest of the directory live/not_ignored restored in rest in restored.yaml
+then manifests initial.yaml and restored.yaml match
+then file rest/live/ignored/CACHEDIR.TAG contains "Signature: 8a477f597d28d172789f06886806bc55"
+then file rest/live/ignored/data.dat does not exist
+~~~
+
+~~~{#client.yaml .file .yaml .numberLines}
+roots:
+- live
+~~~
+
+### Can ignore CACHEDIR.TAGs if told to do so
+
+This scenario verifies that when `exclude_cache_tag_directories` setting is
+disabled, Obnam client backs up directories even if they
+contain [CACHEDIR.TAG][].
+
+[CACHEDIR.TAG]: https://bford.info/cachedir/
+
+~~~scenario
+given an installed obnam
+and a running chunk server
+and a client config based on client_includes_cachedirs.yaml
+and a file live/ignored/data.dat containing some random data
+and a cache directory tag in live/ignored
+and a file live/not_ignored/data.dat containing some random data
+and a manifest of the directory live in initial.yaml
+when I run obnam backup
+then backup generation is GEN
+when I invoke obnam restore <GEN> rest
+given a manifest of the directory live restored in rest in restored.yaml
+then manifests initial.yaml and restored.yaml match
+~~~
+
+~~~{#client_includes_cachedirs.yaml .file .yaml .numberLines}
+roots:
+- live
+exclude_cache_tag_directories: false
+~~~
+
# Acceptance criteria for backup encryption
diff --git a/src/cmd/backup.rs b/src/cmd/backup.rs
index a0e0599..0479844 100644
--- a/src/cmd/backup.rs
+++ b/src/cmd/backup.rs
@@ -65,7 +65,7 @@ fn initial_backup(
let count = {
let mut new = NascentGeneration::create(newtemp.path())?;
for root in &config.roots {
- let iter = FsIterator::new(root);
+ let iter = FsIterator::new(root, config.exclude_cache_tag_directories);
let warnings = new.insert_iter(iter.map(|entry| run.backup(entry)))?;
for w in warnings {
all_warnings.push(w);
@@ -95,7 +95,7 @@ fn incremental_backup(
run.start_backup(&old)?;
let mut new = NascentGeneration::create(newtemp.path())?;
for root in &config.roots {
- let iter = FsIterator::new(root);
+ let iter = FsIterator::new(root, config.exclude_cache_tag_directories);
let warnings = new.insert_iter(iter.map(|entry| run.backup(entry, &old)))?;
for w in warnings {
all_warnings.push(w);
diff --git a/src/config.rs b/src/config.rs
index d6ffbc5..6881959 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -17,6 +17,7 @@ struct TentativeClientConfig {
roots: Vec<PathBuf>,
log: Option<PathBuf>,
encrypt: Option<bool>,
+ exclude_cache_tag_directories: Option<bool>,
}
#[derive(Debug, Serialize, Clone)]
@@ -59,6 +60,7 @@ pub struct ClientConfigWithoutPasswords {
pub roots: Vec<PathBuf>,
pub log: PathBuf,
pub encrypt: bool,
+ pub exclude_cache_tag_directories: bool,
}
#[derive(Debug, thiserror::Error)]
@@ -91,6 +93,7 @@ impl ClientConfigWithoutPasswords {
let tentative: TentativeClientConfig = serde_yaml::from_str(&config)?;
let encrypt = tentative.encrypt.or(Some(false)).unwrap();
+ let exclude_cache_tag_directories = tentative.exclude_cache_tag_directories.unwrap_or(true);
let config = Self {
filename: filename.to_path_buf(),
@@ -103,6 +106,7 @@ impl ClientConfigWithoutPasswords {
.or_else(|| Some(PathBuf::from(DEVNULL)))
.unwrap(),
encrypt,
+ exclude_cache_tag_directories,
};
config.check()?;
diff --git a/src/fsiter.rs b/src/fsiter.rs
index b778cf3..6c18404 100644
--- a/src/fsiter.rs
+++ b/src/fsiter.rs
@@ -5,7 +5,7 @@ use walkdir::{DirEntry, IntoIter, WalkDir};
/// Iterator over file system entries in a directory tree.
pub struct FsIterator {
- iter: IntoIter,
+ iter: SkipCachedirs,
}
#[derive(Debug, thiserror::Error)]
@@ -23,9 +23,12 @@ pub enum FsIterError {
pub type FsIterResult<T> = Result<T, FsIterError>;
impl FsIterator {
- pub fn new(root: &Path) -> Self {
+ pub fn new(root: &Path, exclude_cache_tag_directories: bool) -> Self {
Self {
- iter: WalkDir::new(root).into_iter(),
+ iter: SkipCachedirs::new(
+ WalkDir::new(root).into_iter(),
+ exclude_cache_tag_directories,
+ ),
}
}
}
@@ -33,18 +36,91 @@ impl FsIterator {
impl Iterator for FsIterator {
type Item = FsIterResult<FilesystemEntry>;
fn next(&mut self) -> Option<Self::Item> {
- let next = self.iter.next();
- debug!("walkdir found: {:?}", next);
- match next {
- None => None,
- Some(Ok(entry)) => Some(new_entry(&entry)),
- Some(Err(err)) => Some(Err(err.into())),
+ self.iter.next()
+ }
+}
+
+/// Cachedir-aware adaptor for WalkDir: it skips the contents of dirs that contain CACHEDIR.TAG,
+/// but still yields entries for the dir and the tag themselves.
+struct SkipCachedirs {
+ iter: IntoIter,
+ exclude_cache_tag_directories: bool,
+ // This is the last tag we've found. `next()` will yield it before asking `iter` for more
+ // entries.
+ cachedir_tag: Option<FsIterResult<FilesystemEntry>>,
+}
+
+impl SkipCachedirs {
+ fn new(iter: IntoIter, exclude_cache_tag_directories: bool) -> Self {
+ Self {
+ iter,
+ exclude_cache_tag_directories,
+ cachedir_tag: None,
+ }
+ }
+
+ fn try_enqueue_cachedir_tag(&mut self, entry: &DirEntry) {
+ if !self.exclude_cache_tag_directories {
+ return;
}
+
+ // If this entry is not a directory, it means we already processed its
+ // parent dir and decided that it's not cached.
+ if !entry.file_type().is_dir() {
+ return;
+ }
+
+ let mut tag_path = entry.path().to_owned();
+ tag_path.push("CACHEDIR.TAG");
+
+ // Tags are required to be regular files -- not even symlinks are allowed.
+ if !tag_path.is_file() {
+ return;
+ };
+
+ const CACHEDIR_TAG: &[u8] = b"Signature: 8a477f597d28d172789f06886806bc55";
+ let mut content = [0u8; CACHEDIR_TAG.len()];
+
+ let mut file = if let Ok(file) = std::fs::File::open(&tag_path) {
+ file
+ } else {
+ return;
+ };
+
+ use std::io::Read;
+ match file.read_exact(&mut content) {
+ Ok(_) => (),
+ // If we can't read the tag file, proceed as if's not there
+ Err(_) => return,
+ }
+
+ if content == CACHEDIR_TAG {
+ self.iter.skip_current_dir();
+ self.cachedir_tag = Some(new_entry(&tag_path));
+ }
+ }
+}
+
+impl Iterator for SkipCachedirs {
+ type Item = FsIterResult<FilesystemEntry>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ self.cachedir_tag.take().or_else(|| {
+ let next = self.iter.next();
+ debug!("walkdir found: {:?}", next);
+ match next {
+ None => None,
+ Some(Err(err)) => Some(Err(err.into())),
+ Some(Ok(entry)) => {
+ self.try_enqueue_cachedir_tag(&entry);
+ Some(new_entry(entry.path()))
+ }
+ }
+ })
}
}
-fn new_entry(e: &DirEntry) -> FsIterResult<FilesystemEntry> {
- let path = e.path();
+fn new_entry(path: &Path) -> FsIterResult<FilesystemEntry> {
let meta = std::fs::symlink_metadata(path);
debug!("metadata for {:?}: {:?}", path, meta);
let meta = match meta {
diff --git a/subplot/data.py b/subplot/data.py
index 3833f2e..d134e5f 100644
--- a/subplot/data.py
+++ b/subplot/data.py
@@ -29,6 +29,13 @@ def create_fifo(ctx, filename=None):
os.mkfifo(filename)
+def create_cachedir_tag_in(ctx, dirpath=None):
+ filepath = f"{dirpath}/CACHEDIR.TAG"
+ logging.debug(f"creating {filepath}")
+ os.makedirs(dirpath, exist_ok=True)
+ open(filepath, "w").write("Signature: 8a477f597d28d172789f06886806bc55")
+
+
def create_nonutf8_filename(ctx, dirname=None):
filename = "\x88"
os.mkdir(dirname)
diff --git a/subplot/data.yaml b/subplot/data.yaml
index 699c5b1..dcc6807 100644
--- a/subplot/data.yaml
+++ b/subplot/data.yaml
@@ -10,6 +10,9 @@
- given: "a named pipe {filename}"
function: create_fifo
+- given: a cache directory tag in {dirpath}
+ function: create_cachedir_tag_in
+
- given: "a file in {dirname} with a non-UTF8 filename"
function: create_nonutf8_filename