summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2021-07-30 09:36:44 +0000
committerLars Wirzenius <liw@liw.fi>2021-07-30 09:36:44 +0000
commitcc3524fd60169a66c8a4e2178448bf5f6d07c99b (patch)
treeae7b9c5eaec01aa52e6e0c2aa5b61e9cc66ec2e9
parent7f4087758e401791794f0518233ea058355438d3 (diff)
parentde85cf701613d5e59c87431a6ce10b80caacef40 (diff)
downloadobnam2-cc3524fd60169a66c8a4e2178448bf5f6d07c99b.tar.gz
Merge branch 'feature/112-cachedir-protection' into 'main'
Backup: exit non-zero if new CACHEDIR.TAGs are found (unless `exclude_cache_tag_directories` is disabled) Closes #112 See merge request obnam/obnam!168
-rw-r--r--obnam.md55
-rw-r--r--src/backup_run.rs66
-rw-r--r--src/cmd/backup.rs29
-rw-r--r--src/error.rs5
-rw-r--r--src/fsiter.rs25
-rw-r--r--src/generation.rs137
6 files changed, 259 insertions, 58 deletions
diff --git a/obnam.md b/obnam.md
index 31b87dd..decb28c 100644
--- a/obnam.md
+++ b/obnam.md
@@ -141,9 +141,21 @@ itself says in the "Security Considerations" section:
> than the outright deletion of that directory, for example, causing the
> contents of that directory to be omitted from regular backups.
-For now, the only mitigation is a setting called
-`exclude_cache_tag_directories`, which users can disable if they want to avoid
-this threat.
+This is mitigated in two ways:
+
+1. if an incremental backup finds a tag which wasn't in the previous backup,
+ Obnam will show the path to the tag, and exit with a non-zero exit code. That
+ way, the user has a chance to notice the new tag. The backup itself is still
+ made, so if the tag is legitimate, the user doesn't need to re-run Obnam.
+
+ Error messages and non-zero exit are jarring, so this approach is not
+ user-friendly. Better than nothing though;
+
+2. users can set `exclude_cache_tag_directories` to `false`, which will make
+ Obnam ignore the tags, nullifying the threat.
+
+ This is a last-ditch solution, since it makes the backups larger and slower
+ (because Obnam has to back up more data).
[CACHEDIR.TAG]: https://bford.info/cachedir/
@@ -1635,11 +1647,40 @@ roots:
- live
~~~
-### Can ignore CACHEDIR.TAGs if told to do so
+### Incremental backup errors if it finds new CACHEDIR.TAGs
+
+To mitigate the risk described in the "Threat Model" chapter, Obnam should
+notify the user when it finds CACHEDIR.TAG files that aren't present in the
+previous backup. Notification is twofold: the path to the tag should be shown,
+and the client should exit with a non-zero code. This scenario runs backups the
+a directory (which shouldn't error), then adds a new tag and backups the
+directory again, expecting an error.
+
+~~~scenario
+given an installed obnam
+and a running chunk server
+and a client config based on client.yaml
+and a file live/data1.dat containing some random data
+and a file live/data2.dat containing some random data
+when I run obnam backup
+then exit code is 0
+given a cache directory tag in live/
+when I try to run obnam backup
+then exit code is 1
+and stdout contains "live/CACHEDIR.TAG"
+when I run obnam list-files
+then exit code is 0
+then file live/CACHEDIR.TAG was backed up because it was new
+and stdout doesn't contain "live/data1.dat"
+and stdout doesn't contain "live/data2.dat"
+~~~
+
+### Ignore CACHEDIR.TAGs if `exclude_cache_tag_directories` is disabled
This scenario verifies that when `exclude_cache_tag_directories` setting is
disabled, Obnam client backs up directories even if they
-contain [CACHEDIR.TAG][].
+contain [CACHEDIR.TAG][]. It also verifies that incremental backups don't fail when
+new tags are added, i.e. the aforementioned mitigation is disabled too.
[CACHEDIR.TAG]: https://bford.info/cachedir/
@@ -1656,6 +1697,10 @@ then backup generation is GEN
when I invoke obnam restore <GEN> rest
given a manifest of the directory live restored in rest in restored.yaml
then manifests initial.yaml and restored.yaml match
+given a cache directory tag in live/not_ignored
+when I run obnam backup
+then exit code is 0
+and stdout doesn't contain "live/not_ignored/CACHEDIR.TAG"
~~~
~~~{#client_includes_cachedirs.yaml .file .yaml .numberLines}
diff --git a/src/backup_run.rs b/src/backup_run.rs
index 3c03ec3..738830d 100644
--- a/src/backup_run.rs
+++ b/src/backup_run.rs
@@ -5,11 +5,11 @@ use crate::client::{BackupClient, ClientError};
use crate::config::ClientConfig;
use crate::error::ObnamError;
use crate::fsentry::FilesystemEntry;
-use crate::fsiter::{FsIterError, FsIterator};
+use crate::fsiter::{AnnotatedFsEntry, FsIterError, FsIterator};
use crate::generation::{LocalGeneration, LocalGenerationError, NascentError, NascentGeneration};
use crate::policy::BackupPolicy;
use log::{info, warn};
-use std::path::Path;
+use std::path::{Path, PathBuf};
pub struct BackupRun<'a> {
client: &'a BackupClient,
@@ -35,6 +35,17 @@ pub struct FsEntryBackupOutcome {
pub entry: FilesystemEntry,
pub ids: Vec<ChunkId>,
pub reason: Reason,
+ pub is_cachedir_tag: bool,
+}
+
+#[derive(Debug)]
+pub struct RootsBackupOutcome {
+ /// The number of backed up files.
+ pub files_count: i64,
+ /// The errors encountered while backing up files.
+ pub warnings: Vec<BackupError>,
+ /// CACHEDIR.TAG files that aren't present in in a previous generation.
+ pub new_cachedir_tags: Vec<PathBuf>,
}
impl<'a> BackupRun<'a> {
@@ -101,24 +112,38 @@ impl<'a> BackupRun<'a> {
config: &ClientConfig,
old: &LocalGeneration,
newpath: &Path,
- ) -> Result<(i64, Vec<BackupError>), NascentError> {
- let mut all_warnings = vec![];
- let count = {
+ ) -> Result<RootsBackupOutcome, NascentError> {
+ let mut warnings = vec![];
+ let mut new_cachedir_tags = vec![];
+ let files_count = {
let mut new = NascentGeneration::create(newpath)?;
for root in &config.roots {
let iter = FsIterator::new(root, config.exclude_cache_tag_directories);
- let mut warnings = new.insert_iter(iter.map(|entry| self.backup(entry, &old)))?;
- all_warnings.append(&mut warnings);
+ let entries = iter.map(|entry| {
+ if let Ok(ref entry) = entry {
+ let path = entry.inner.pathbuf();
+ if entry.is_cachedir_tag && !old.is_cachedir_tag(&path)? {
+ new_cachedir_tags.push(path);
+ }
+ };
+ self.backup(entry, &old)
+ });
+ let mut new_warnings = new.insert_iter(entries)?;
+ warnings.append(&mut new_warnings);
}
new.file_count()
};
self.finish();
- Ok((count, all_warnings))
+ Ok(RootsBackupOutcome {
+ files_count,
+ warnings,
+ new_cachedir_tags,
+ })
}
pub fn backup(
&self,
- entry: Result<FilesystemEntry, FsIterError>,
+ entry: Result<AnnotatedFsEntry, FsIterError>,
old: &LocalGeneration,
) -> Result<FsEntryBackupOutcome, BackupError> {
match entry {
@@ -128,10 +153,10 @@ impl<'a> BackupRun<'a> {
Err(BackupError::FsIterError(err))
}
Ok(entry) => {
- let path = &entry.pathbuf();
+ let path = &entry.inner.pathbuf();
info!("backup: {}", path.display());
self.found_live_file(path);
- let reason = self.policy.needs_backup(&old, &entry);
+ let reason = self.policy.needs_backup(&old, &entry.inner);
match reason {
Reason::IsNew
| Reason::Changed
@@ -144,7 +169,7 @@ impl<'a> BackupRun<'a> {
reason,
)),
Reason::Unchanged | Reason::Skipped | Reason::FileError => {
- let fileno = old.get_fileno(&entry.pathbuf())?;
+ let fileno = old.get_fileno(&entry.inner.pathbuf())?;
let ids = if let Some(fileno) = fileno {
let mut ids = vec![];
for id in old.chunkids(fileno)?.iter()? {
@@ -154,7 +179,12 @@ impl<'a> BackupRun<'a> {
} else {
vec![]
};
- Ok(FsEntryBackupOutcome { entry, ids, reason })
+ Ok(FsEntryBackupOutcome {
+ entry: entry.inner,
+ ids,
+ reason,
+ is_cachedir_tag: entry.is_cachedir_tag,
+ })
}
}
}
@@ -172,25 +202,27 @@ impl<'a> BackupRun<'a> {
fn backup_file(
client: &BackupClient,
- entry: &FilesystemEntry,
+ entry: &AnnotatedFsEntry,
path: &Path,
chunk_size: usize,
reason: Reason,
) -> FsEntryBackupOutcome {
- let ids = client.upload_filesystem_entry(&entry, chunk_size);
+ let ids = client.upload_filesystem_entry(&entry.inner, chunk_size);
match ids {
Err(err) => {
warn!("error backing up {}, skipping it: {}", path.display(), err);
FsEntryBackupOutcome {
- entry: entry.clone(),
+ entry: entry.inner.clone(),
ids: vec![],
reason: Reason::FileError,
+ is_cachedir_tag: entry.is_cachedir_tag,
}
}
Ok(ids) => FsEntryBackupOutcome {
- entry: entry.clone(),
+ entry: entry.inner.clone(),
ids,
reason,
+ is_cachedir_tag: entry.is_cachedir_tag,
},
}
}
diff --git a/src/cmd/backup.rs b/src/cmd/backup.rs
index d574b96..04dfb05 100644
--- a/src/cmd/backup.rs
+++ b/src/cmd/backup.rs
@@ -26,30 +26,47 @@ impl Backup {
let oldtemp = NamedTempFile::new()?;
let newtemp = NamedTempFile::new()?;
- let (count, warnings) = match genlist.resolve("latest") {
+ let (is_incremental, outcome) = match genlist.resolve("latest") {
Err(_) => {
info!("fresh backup without a previous generation");
let mut run = BackupRun::initial(config, &client)?;
let old = run.start(None, oldtemp.path())?;
- run.backup_roots(config, &old, newtemp.path())?
+ (false, run.backup_roots(config, &old, newtemp.path())?)
}
Ok(old_id) => {
info!("incremental backup based on {}", old_id);
let mut run = BackupRun::incremental(config, &client)?;
let old = run.start(Some(&old_id), oldtemp.path())?;
- run.backup_roots(config, &old, newtemp.path())?
+ (true, run.backup_roots(config, &old, newtemp.path())?)
}
};
let gen_id = upload_nascent_generation(&client, newtemp.path())?;
- for w in warnings.iter() {
+ for w in outcome.warnings.iter() {
println!("warning: {}", w);
}
- report_stats(&runtime, count, &gen_id, warnings.len())?;
+ if is_incremental && !outcome.new_cachedir_tags.is_empty() {
+ println!("New CACHEDIR.TAG files since the last backup:");
+ for t in &outcome.new_cachedir_tags {
+ println!("- {:?}", t);
+ }
+ println!("You can configure Obnam to ignore all such files by setting `exclude_cache_tag_directories` to `false`.");
+ }
- Ok(())
+ report_stats(
+ &runtime,
+ outcome.files_count,
+ &gen_id,
+ outcome.warnings.len(),
+ )?;
+
+ if is_incremental && !outcome.new_cachedir_tags.is_empty() {
+ Err(ObnamError::NewCachedirTagsFound)
+ } else {
+ Ok(())
+ }
}
}
diff --git a/src/error.rs b/src/error.rs
index e4d77d3..30571ec 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -52,4 +52,9 @@ pub enum ObnamError {
#[error(transparent)]
SerdeJsonError(#[from] serde_json::Error),
+
+ #[error(
+ "found CACHEDIR.TAG files that aren't present in the previous backup, might be an attack"
+ )]
+ NewCachedirTagsFound,
}
diff --git a/src/fsiter.rs b/src/fsiter.rs
index aea9078..2325793 100644
--- a/src/fsiter.rs
+++ b/src/fsiter.rs
@@ -3,6 +3,13 @@ use log::{debug, warn};
use std::path::{Path, PathBuf};
use walkdir::{DirEntry, IntoIter, WalkDir};
+/// Filesystem entry along with additional info about it.
+pub struct AnnotatedFsEntry {
+ pub inner: FilesystemEntry,
+ /// Is `entry` a valid CACHEDIR.TAG?
+ pub is_cachedir_tag: bool,
+}
+
/// Iterator over file system entries in a directory tree.
pub struct FsIterator {
iter: SkipCachedirs,
@@ -32,7 +39,7 @@ impl FsIterator {
}
impl Iterator for FsIterator {
- type Item = Result<FilesystemEntry, FsIterError>;
+ type Item = Result<AnnotatedFsEntry, FsIterError>;
fn next(&mut self) -> Option<Self::Item> {
self.iter.next()
}
@@ -45,7 +52,7 @@ struct SkipCachedirs {
exclude_cache_tag_directories: bool,
// This is the last tag we've found. `next()` will yield it before asking `iter` for more
// entries.
- cachedir_tag: Option<Result<FilesystemEntry, FsIterError>>,
+ cachedir_tag: Option<Result<AnnotatedFsEntry, FsIterError>>,
}
impl SkipCachedirs {
@@ -94,13 +101,13 @@ impl SkipCachedirs {
if content == CACHEDIR_TAG {
self.iter.skip_current_dir();
- self.cachedir_tag = Some(new_entry(&tag_path));
+ self.cachedir_tag = Some(new_entry(&tag_path, true));
}
}
}
impl Iterator for SkipCachedirs {
- type Item = Result<FilesystemEntry, FsIterError>;
+ type Item = Result<AnnotatedFsEntry, FsIterError>;
fn next(&mut self) -> Option<Self::Item> {
self.cachedir_tag.take().or_else(|| {
@@ -111,14 +118,14 @@ impl Iterator for SkipCachedirs {
Some(Err(err)) => Some(Err(FsIterError::WalkDir(err))),
Some(Ok(entry)) => {
self.try_enqueue_cachedir_tag(&entry);
- Some(new_entry(entry.path()))
+ Some(new_entry(entry.path(), false))
}
}
})
}
}
-fn new_entry(path: &Path) -> Result<FilesystemEntry, FsIterError> {
+fn new_entry(path: &Path, is_cachedir_tag: bool) -> Result<AnnotatedFsEntry, FsIterError> {
let meta = std::fs::symlink_metadata(path);
debug!("metadata for {:?}: {:?}", path, meta);
let meta = match meta {
@@ -130,5 +137,9 @@ fn new_entry(path: &Path) -> Result<FilesystemEntry, FsIterError> {
};
let entry = FilesystemEntry::from_metadata(path, &meta)?;
debug!("FileSystemEntry for {:?}: {:?}", path, entry);
- Ok(entry)
+ let annotated = AnnotatedFsEntry {
+ inner: entry,
+ is_cachedir_tag,
+ };
+ Ok(annotated)
}
diff --git a/src/generation.rs b/src/generation.rs
index eeb0e76..d770235 100644
--- a/src/generation.rs
+++ b/src/generation.rs
@@ -55,10 +55,11 @@ impl NascentGeneration {
e: FilesystemEntry,
ids: &[ChunkId],
reason: Reason,
+ is_cachedir_tag: bool,
) -> Result<(), NascentError> {
let t = self.conn.transaction().map_err(NascentError::Transaction)?;
self.fileno += 1;
- sql::insert_one(&t, e, self.fileno, ids, reason)?;
+ sql::insert_one(&t, e, self.fileno, ids, reason, is_cachedir_tag)?;
t.commit().map_err(NascentError::Commit)?;
Ok(())
}
@@ -75,9 +76,14 @@ impl NascentGeneration {
debug!("ignoring backup error {}", err);
warnings.push(err);
}
- Ok(FsEntryBackupOutcome { entry, ids, reason }) => {
+ Ok(FsEntryBackupOutcome {
+ entry,
+ ids,
+ reason,
+ is_cachedir_tag,
+ }) => {
self.fileno += 1;
- sql::insert_one(&t, entry, self.fileno, &ids[..], reason)?;
+ sql::insert_one(&t, entry, self.fileno, &ids[..], reason, is_cachedir_tag)?;
}
}
}
@@ -86,23 +92,6 @@ impl NascentGeneration {
}
}
-#[cfg(test)]
-mod test {
- use super::NascentGeneration;
- use tempfile::NamedTempFile;
-
- #[test]
- fn empty() {
- let filename = NamedTempFile::new().unwrap().path().to_path_buf();
- {
- let mut _gen = NascentGeneration::create(&filename).unwrap();
- // _gen is dropped here; the connection is close; the file
- // should not be removed.
- }
- assert!(filename.exists());
- }
-}
-
/// A finished generation.
///
/// A generation is finished when it's on the server. It can be restored.
@@ -216,6 +205,10 @@ impl LocalGeneration {
pub fn get_fileno(&self, filename: &Path) -> Result<Option<FileId>, LocalGenerationError> {
sql::get_fileno(&self.conn, filename)
}
+
+ pub fn is_cachedir_tag(&self, filename: &Path) -> Result<bool, LocalGenerationError> {
+ sql::is_cachedir_tag(&self.conn, filename)
+ }
}
mod sql {
@@ -234,7 +227,7 @@ mod sql {
let flags = OpenFlags::SQLITE_OPEN_CREATE | OpenFlags::SQLITE_OPEN_READ_WRITE;
let conn = Connection::open_with_flags(filename, flags)?;
conn.execute(
- "CREATE TABLE files (fileno INTEGER PRIMARY KEY, filename BLOB, json TEXT, reason TEXT)",
+ "CREATE TABLE files (fileno INTEGER PRIMARY KEY, filename BLOB, json TEXT, reason TEXT, is_cachedir_tag BOOLEAN)",
params![],
)?;
conn.execute(
@@ -260,11 +253,12 @@ mod sql {
fileno: FileId,
ids: &[ChunkId],
reason: Reason,
+ is_cachedir_tag: bool,
) -> Result<(), LocalGenerationError> {
let json = serde_json::to_string(&e)?;
t.execute(
- "INSERT INTO files (fileno, filename, json, reason) VALUES (?1, ?2, ?3, ?4)",
- params![fileno, path_into_blob(&e.pathbuf()), &json, reason,],
+ "INSERT INTO files (fileno, filename, json, reason, is_cachedir_tag) VALUES (?1, ?2, ?3, ?4, ?5)",
+ params![fileno, path_into_blob(&e.pathbuf()), &json, reason, is_cachedir_tag,],
)?;
for id in ids {
t.execute(
@@ -428,4 +422,101 @@ mod sql {
}
}
}
+
+ pub fn is_cachedir_tag(
+ conn: &Connection,
+ filename: &Path,
+ ) -> Result<bool, LocalGenerationError> {
+ let mut stmt = conn.prepare("SELECT is_cachedir_tag FROM files WHERE filename = ?1")?;
+ let mut iter = stmt.query_map(params![path_into_blob(filename)], |row| row.get(0))?;
+ match iter.next() {
+ // File is missing, so it's definitely not a CACHEDIR.TAG
+ None => Ok(false),
+ Some(result) => Ok(result?),
+ }
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::{LocalGeneration, NascentGeneration};
+ use tempfile::NamedTempFile;
+
+ #[test]
+ fn empty() {
+ let filename = NamedTempFile::new().unwrap().path().to_path_buf();
+ {
+ let mut _gen = NascentGeneration::create(&filename).unwrap();
+ // _gen is dropped here; the connection is close; the file
+ // should not be removed.
+ }
+ assert!(filename.exists());
+ }
+
+ #[test]
+ fn remembers_cachedir_tags() {
+ use crate::{
+ backup_reason::Reason, backup_run::FsEntryBackupOutcome, fsentry::FilesystemEntry,
+ };
+ use std::{fs::metadata, mem::drop, path::Path};
+
+ // Create a `Metadata` structure to pass to other functions (we don't care about the
+ // contents)
+ let src_file = NamedTempFile::new().unwrap();
+ let metadata = metadata(src_file.path()).unwrap();
+
+ let dbfile = NamedTempFile::new().unwrap().path().to_path_buf();
+
+ let nontag_path1 = Path::new("/nontag1");
+ let nontag_path2 = Path::new("/dir/nontag2");
+ let tag_path1 = Path::new("/a_tag");
+ let tag_path2 = Path::new("/another_dir/a_tag");
+
+ let mut gen = NascentGeneration::create(&dbfile).unwrap();
+
+ gen.insert(
+ FilesystemEntry::from_metadata(&nontag_path1, &metadata).unwrap(),
+ &[],
+ Reason::IsNew,
+ false,
+ )
+ .unwrap();
+ gen.insert(
+ FilesystemEntry::from_metadata(&tag_path1, &metadata).unwrap(),
+ &[],
+ Reason::IsNew,
+ true,
+ )
+ .unwrap();
+
+ let entries = vec![
+ Ok(FsEntryBackupOutcome {
+ entry: FilesystemEntry::from_metadata(&nontag_path2, &metadata).unwrap(),
+ ids: vec![],
+ reason: Reason::IsNew,
+ is_cachedir_tag: false,
+ }),
+ Ok(FsEntryBackupOutcome {
+ entry: FilesystemEntry::from_metadata(&tag_path2, &metadata).unwrap(),
+ ids: vec![],
+ reason: Reason::IsNew,
+ is_cachedir_tag: true,
+ }),
+ ];
+ gen.insert_iter(entries.into_iter()).unwrap();
+
+ drop(gen);
+
+ let gen = LocalGeneration::open(dbfile).unwrap();
+ assert!(!gen.is_cachedir_tag(nontag_path1).unwrap());
+ assert!(!gen.is_cachedir_tag(nontag_path2).unwrap());
+ assert!(gen.is_cachedir_tag(tag_path1).unwrap());
+ assert!(gen.is_cachedir_tag(tag_path2).unwrap());
+
+ // Nonexistent files are not cachedir tags
+ assert!(!gen.is_cachedir_tag(Path::new("/hello/world")).unwrap());
+ assert!(!gen
+ .is_cachedir_tag(Path::new("/different path/to/another file.txt"))
+ .unwrap());
+ }
}