diff options
author | Lars Wirzenius <liw@liw.fi> | 2021-07-30 09:36:44 +0000 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2021-07-30 09:36:44 +0000 |
commit | cc3524fd60169a66c8a4e2178448bf5f6d07c99b (patch) | |
tree | ae7b9c5eaec01aa52e6e0c2aa5b61e9cc66ec2e9 | |
parent | 7f4087758e401791794f0518233ea058355438d3 (diff) | |
parent | de85cf701613d5e59c87431a6ce10b80caacef40 (diff) | |
download | obnam2-cc3524fd60169a66c8a4e2178448bf5f6d07c99b.tar.gz |
Merge branch 'feature/112-cachedir-protection' into 'main'
Backup: exit non-zero if new CACHEDIR.TAGs are found (unless `exclude_cache_tag_directories` is disabled)
Closes #112
See merge request obnam/obnam!168
-rw-r--r-- | obnam.md | 55 | ||||
-rw-r--r-- | src/backup_run.rs | 66 | ||||
-rw-r--r-- | src/cmd/backup.rs | 29 | ||||
-rw-r--r-- | src/error.rs | 5 | ||||
-rw-r--r-- | src/fsiter.rs | 25 | ||||
-rw-r--r-- | src/generation.rs | 137 |
6 files changed, 259 insertions, 58 deletions
@@ -141,9 +141,21 @@ itself says in the "Security Considerations" section: > than the outright deletion of that directory, for example, causing the > contents of that directory to be omitted from regular backups. -For now, the only mitigation is a setting called -`exclude_cache_tag_directories`, which users can disable if they want to avoid -this threat. +This is mitigated in two ways: + +1. if an incremental backup finds a tag which wasn't in the previous backup, + Obnam will show the path to the tag, and exit with a non-zero exit code. That + way, the user has a chance to notice the new tag. The backup itself is still + made, so if the tag is legitimate, the user doesn't need to re-run Obnam. + + Error messages and non-zero exit are jarring, so this approach is not + user-friendly. Better than nothing though; + +2. users can set `exclude_cache_tag_directories` to `false`, which will make + Obnam ignore the tags, nullifying the threat. + + This is a last-ditch solution, since it makes the backups larger and slower + (because Obnam has to back up more data). [CACHEDIR.TAG]: https://bford.info/cachedir/ @@ -1635,11 +1647,40 @@ roots: - live ~~~ -### Can ignore CACHEDIR.TAGs if told to do so +### Incremental backup errors if it finds new CACHEDIR.TAGs + +To mitigate the risk described in the "Threat Model" chapter, Obnam should +notify the user when it finds CACHEDIR.TAG files that aren't present in the +previous backup. Notification is twofold: the path to the tag should be shown, +and the client should exit with a non-zero code. This scenario runs backups the +a directory (which shouldn't error), then adds a new tag and backups the +directory again, expecting an error. + +~~~scenario +given an installed obnam +and a running chunk server +and a client config based on client.yaml +and a file live/data1.dat containing some random data +and a file live/data2.dat containing some random data +when I run obnam backup +then exit code is 0 +given a cache directory tag in live/ +when I try to run obnam backup +then exit code is 1 +and stdout contains "live/CACHEDIR.TAG" +when I run obnam list-files +then exit code is 0 +then file live/CACHEDIR.TAG was backed up because it was new +and stdout doesn't contain "live/data1.dat" +and stdout doesn't contain "live/data2.dat" +~~~ + +### Ignore CACHEDIR.TAGs if `exclude_cache_tag_directories` is disabled This scenario verifies that when `exclude_cache_tag_directories` setting is disabled, Obnam client backs up directories even if they -contain [CACHEDIR.TAG][]. +contain [CACHEDIR.TAG][]. It also verifies that incremental backups don't fail when +new tags are added, i.e. the aforementioned mitigation is disabled too. [CACHEDIR.TAG]: https://bford.info/cachedir/ @@ -1656,6 +1697,10 @@ then backup generation is GEN when I invoke obnam restore <GEN> rest given a manifest of the directory live restored in rest in restored.yaml then manifests initial.yaml and restored.yaml match +given a cache directory tag in live/not_ignored +when I run obnam backup +then exit code is 0 +and stdout doesn't contain "live/not_ignored/CACHEDIR.TAG" ~~~ ~~~{#client_includes_cachedirs.yaml .file .yaml .numberLines} diff --git a/src/backup_run.rs b/src/backup_run.rs index 3c03ec3..738830d 100644 --- a/src/backup_run.rs +++ b/src/backup_run.rs @@ -5,11 +5,11 @@ use crate::client::{BackupClient, ClientError}; use crate::config::ClientConfig; use crate::error::ObnamError; use crate::fsentry::FilesystemEntry; -use crate::fsiter::{FsIterError, FsIterator}; +use crate::fsiter::{AnnotatedFsEntry, FsIterError, FsIterator}; use crate::generation::{LocalGeneration, LocalGenerationError, NascentError, NascentGeneration}; use crate::policy::BackupPolicy; use log::{info, warn}; -use std::path::Path; +use std::path::{Path, PathBuf}; pub struct BackupRun<'a> { client: &'a BackupClient, @@ -35,6 +35,17 @@ pub struct FsEntryBackupOutcome { pub entry: FilesystemEntry, pub ids: Vec<ChunkId>, pub reason: Reason, + pub is_cachedir_tag: bool, +} + +#[derive(Debug)] +pub struct RootsBackupOutcome { + /// The number of backed up files. + pub files_count: i64, + /// The errors encountered while backing up files. + pub warnings: Vec<BackupError>, + /// CACHEDIR.TAG files that aren't present in in a previous generation. + pub new_cachedir_tags: Vec<PathBuf>, } impl<'a> BackupRun<'a> { @@ -101,24 +112,38 @@ impl<'a> BackupRun<'a> { config: &ClientConfig, old: &LocalGeneration, newpath: &Path, - ) -> Result<(i64, Vec<BackupError>), NascentError> { - let mut all_warnings = vec![]; - let count = { + ) -> Result<RootsBackupOutcome, NascentError> { + let mut warnings = vec![]; + let mut new_cachedir_tags = vec![]; + let files_count = { let mut new = NascentGeneration::create(newpath)?; for root in &config.roots { let iter = FsIterator::new(root, config.exclude_cache_tag_directories); - let mut warnings = new.insert_iter(iter.map(|entry| self.backup(entry, &old)))?; - all_warnings.append(&mut warnings); + let entries = iter.map(|entry| { + if let Ok(ref entry) = entry { + let path = entry.inner.pathbuf(); + if entry.is_cachedir_tag && !old.is_cachedir_tag(&path)? { + new_cachedir_tags.push(path); + } + }; + self.backup(entry, &old) + }); + let mut new_warnings = new.insert_iter(entries)?; + warnings.append(&mut new_warnings); } new.file_count() }; self.finish(); - Ok((count, all_warnings)) + Ok(RootsBackupOutcome { + files_count, + warnings, + new_cachedir_tags, + }) } pub fn backup( &self, - entry: Result<FilesystemEntry, FsIterError>, + entry: Result<AnnotatedFsEntry, FsIterError>, old: &LocalGeneration, ) -> Result<FsEntryBackupOutcome, BackupError> { match entry { @@ -128,10 +153,10 @@ impl<'a> BackupRun<'a> { Err(BackupError::FsIterError(err)) } Ok(entry) => { - let path = &entry.pathbuf(); + let path = &entry.inner.pathbuf(); info!("backup: {}", path.display()); self.found_live_file(path); - let reason = self.policy.needs_backup(&old, &entry); + let reason = self.policy.needs_backup(&old, &entry.inner); match reason { Reason::IsNew | Reason::Changed @@ -144,7 +169,7 @@ impl<'a> BackupRun<'a> { reason, )), Reason::Unchanged | Reason::Skipped | Reason::FileError => { - let fileno = old.get_fileno(&entry.pathbuf())?; + let fileno = old.get_fileno(&entry.inner.pathbuf())?; let ids = if let Some(fileno) = fileno { let mut ids = vec![]; for id in old.chunkids(fileno)?.iter()? { @@ -154,7 +179,12 @@ impl<'a> BackupRun<'a> { } else { vec![] }; - Ok(FsEntryBackupOutcome { entry, ids, reason }) + Ok(FsEntryBackupOutcome { + entry: entry.inner, + ids, + reason, + is_cachedir_tag: entry.is_cachedir_tag, + }) } } } @@ -172,25 +202,27 @@ impl<'a> BackupRun<'a> { fn backup_file( client: &BackupClient, - entry: &FilesystemEntry, + entry: &AnnotatedFsEntry, path: &Path, chunk_size: usize, reason: Reason, ) -> FsEntryBackupOutcome { - let ids = client.upload_filesystem_entry(&entry, chunk_size); + let ids = client.upload_filesystem_entry(&entry.inner, chunk_size); match ids { Err(err) => { warn!("error backing up {}, skipping it: {}", path.display(), err); FsEntryBackupOutcome { - entry: entry.clone(), + entry: entry.inner.clone(), ids: vec![], reason: Reason::FileError, + is_cachedir_tag: entry.is_cachedir_tag, } } Ok(ids) => FsEntryBackupOutcome { - entry: entry.clone(), + entry: entry.inner.clone(), ids, reason, + is_cachedir_tag: entry.is_cachedir_tag, }, } } diff --git a/src/cmd/backup.rs b/src/cmd/backup.rs index d574b96..04dfb05 100644 --- a/src/cmd/backup.rs +++ b/src/cmd/backup.rs @@ -26,30 +26,47 @@ impl Backup { let oldtemp = NamedTempFile::new()?; let newtemp = NamedTempFile::new()?; - let (count, warnings) = match genlist.resolve("latest") { + let (is_incremental, outcome) = match genlist.resolve("latest") { Err(_) => { info!("fresh backup without a previous generation"); let mut run = BackupRun::initial(config, &client)?; let old = run.start(None, oldtemp.path())?; - run.backup_roots(config, &old, newtemp.path())? + (false, run.backup_roots(config, &old, newtemp.path())?) } Ok(old_id) => { info!("incremental backup based on {}", old_id); let mut run = BackupRun::incremental(config, &client)?; let old = run.start(Some(&old_id), oldtemp.path())?; - run.backup_roots(config, &old, newtemp.path())? + (true, run.backup_roots(config, &old, newtemp.path())?) } }; let gen_id = upload_nascent_generation(&client, newtemp.path())?; - for w in warnings.iter() { + for w in outcome.warnings.iter() { println!("warning: {}", w); } - report_stats(&runtime, count, &gen_id, warnings.len())?; + if is_incremental && !outcome.new_cachedir_tags.is_empty() { + println!("New CACHEDIR.TAG files since the last backup:"); + for t in &outcome.new_cachedir_tags { + println!("- {:?}", t); + } + println!("You can configure Obnam to ignore all such files by setting `exclude_cache_tag_directories` to `false`."); + } - Ok(()) + report_stats( + &runtime, + outcome.files_count, + &gen_id, + outcome.warnings.len(), + )?; + + if is_incremental && !outcome.new_cachedir_tags.is_empty() { + Err(ObnamError::NewCachedirTagsFound) + } else { + Ok(()) + } } } diff --git a/src/error.rs b/src/error.rs index e4d77d3..30571ec 100644 --- a/src/error.rs +++ b/src/error.rs @@ -52,4 +52,9 @@ pub enum ObnamError { #[error(transparent)] SerdeJsonError(#[from] serde_json::Error), + + #[error( + "found CACHEDIR.TAG files that aren't present in the previous backup, might be an attack" + )] + NewCachedirTagsFound, } diff --git a/src/fsiter.rs b/src/fsiter.rs index aea9078..2325793 100644 --- a/src/fsiter.rs +++ b/src/fsiter.rs @@ -3,6 +3,13 @@ use log::{debug, warn}; use std::path::{Path, PathBuf}; use walkdir::{DirEntry, IntoIter, WalkDir}; +/// Filesystem entry along with additional info about it. +pub struct AnnotatedFsEntry { + pub inner: FilesystemEntry, + /// Is `entry` a valid CACHEDIR.TAG? + pub is_cachedir_tag: bool, +} + /// Iterator over file system entries in a directory tree. pub struct FsIterator { iter: SkipCachedirs, @@ -32,7 +39,7 @@ impl FsIterator { } impl Iterator for FsIterator { - type Item = Result<FilesystemEntry, FsIterError>; + type Item = Result<AnnotatedFsEntry, FsIterError>; fn next(&mut self) -> Option<Self::Item> { self.iter.next() } @@ -45,7 +52,7 @@ struct SkipCachedirs { exclude_cache_tag_directories: bool, // This is the last tag we've found. `next()` will yield it before asking `iter` for more // entries. - cachedir_tag: Option<Result<FilesystemEntry, FsIterError>>, + cachedir_tag: Option<Result<AnnotatedFsEntry, FsIterError>>, } impl SkipCachedirs { @@ -94,13 +101,13 @@ impl SkipCachedirs { if content == CACHEDIR_TAG { self.iter.skip_current_dir(); - self.cachedir_tag = Some(new_entry(&tag_path)); + self.cachedir_tag = Some(new_entry(&tag_path, true)); } } } impl Iterator for SkipCachedirs { - type Item = Result<FilesystemEntry, FsIterError>; + type Item = Result<AnnotatedFsEntry, FsIterError>; fn next(&mut self) -> Option<Self::Item> { self.cachedir_tag.take().or_else(|| { @@ -111,14 +118,14 @@ impl Iterator for SkipCachedirs { Some(Err(err)) => Some(Err(FsIterError::WalkDir(err))), Some(Ok(entry)) => { self.try_enqueue_cachedir_tag(&entry); - Some(new_entry(entry.path())) + Some(new_entry(entry.path(), false)) } } }) } } -fn new_entry(path: &Path) -> Result<FilesystemEntry, FsIterError> { +fn new_entry(path: &Path, is_cachedir_tag: bool) -> Result<AnnotatedFsEntry, FsIterError> { let meta = std::fs::symlink_metadata(path); debug!("metadata for {:?}: {:?}", path, meta); let meta = match meta { @@ -130,5 +137,9 @@ fn new_entry(path: &Path) -> Result<FilesystemEntry, FsIterError> { }; let entry = FilesystemEntry::from_metadata(path, &meta)?; debug!("FileSystemEntry for {:?}: {:?}", path, entry); - Ok(entry) + let annotated = AnnotatedFsEntry { + inner: entry, + is_cachedir_tag, + }; + Ok(annotated) } diff --git a/src/generation.rs b/src/generation.rs index eeb0e76..d770235 100644 --- a/src/generation.rs +++ b/src/generation.rs @@ -55,10 +55,11 @@ impl NascentGeneration { e: FilesystemEntry, ids: &[ChunkId], reason: Reason, + is_cachedir_tag: bool, ) -> Result<(), NascentError> { let t = self.conn.transaction().map_err(NascentError::Transaction)?; self.fileno += 1; - sql::insert_one(&t, e, self.fileno, ids, reason)?; + sql::insert_one(&t, e, self.fileno, ids, reason, is_cachedir_tag)?; t.commit().map_err(NascentError::Commit)?; Ok(()) } @@ -75,9 +76,14 @@ impl NascentGeneration { debug!("ignoring backup error {}", err); warnings.push(err); } - Ok(FsEntryBackupOutcome { entry, ids, reason }) => { + Ok(FsEntryBackupOutcome { + entry, + ids, + reason, + is_cachedir_tag, + }) => { self.fileno += 1; - sql::insert_one(&t, entry, self.fileno, &ids[..], reason)?; + sql::insert_one(&t, entry, self.fileno, &ids[..], reason, is_cachedir_tag)?; } } } @@ -86,23 +92,6 @@ impl NascentGeneration { } } -#[cfg(test)] -mod test { - use super::NascentGeneration; - use tempfile::NamedTempFile; - - #[test] - fn empty() { - let filename = NamedTempFile::new().unwrap().path().to_path_buf(); - { - let mut _gen = NascentGeneration::create(&filename).unwrap(); - // _gen is dropped here; the connection is close; the file - // should not be removed. - } - assert!(filename.exists()); - } -} - /// A finished generation. /// /// A generation is finished when it's on the server. It can be restored. @@ -216,6 +205,10 @@ impl LocalGeneration { pub fn get_fileno(&self, filename: &Path) -> Result<Option<FileId>, LocalGenerationError> { sql::get_fileno(&self.conn, filename) } + + pub fn is_cachedir_tag(&self, filename: &Path) -> Result<bool, LocalGenerationError> { + sql::is_cachedir_tag(&self.conn, filename) + } } mod sql { @@ -234,7 +227,7 @@ mod sql { let flags = OpenFlags::SQLITE_OPEN_CREATE | OpenFlags::SQLITE_OPEN_READ_WRITE; let conn = Connection::open_with_flags(filename, flags)?; conn.execute( - "CREATE TABLE files (fileno INTEGER PRIMARY KEY, filename BLOB, json TEXT, reason TEXT)", + "CREATE TABLE files (fileno INTEGER PRIMARY KEY, filename BLOB, json TEXT, reason TEXT, is_cachedir_tag BOOLEAN)", params![], )?; conn.execute( @@ -260,11 +253,12 @@ mod sql { fileno: FileId, ids: &[ChunkId], reason: Reason, + is_cachedir_tag: bool, ) -> Result<(), LocalGenerationError> { let json = serde_json::to_string(&e)?; t.execute( - "INSERT INTO files (fileno, filename, json, reason) VALUES (?1, ?2, ?3, ?4)", - params![fileno, path_into_blob(&e.pathbuf()), &json, reason,], + "INSERT INTO files (fileno, filename, json, reason, is_cachedir_tag) VALUES (?1, ?2, ?3, ?4, ?5)", + params![fileno, path_into_blob(&e.pathbuf()), &json, reason, is_cachedir_tag,], )?; for id in ids { t.execute( @@ -428,4 +422,101 @@ mod sql { } } } + + pub fn is_cachedir_tag( + conn: &Connection, + filename: &Path, + ) -> Result<bool, LocalGenerationError> { + let mut stmt = conn.prepare("SELECT is_cachedir_tag FROM files WHERE filename = ?1")?; + let mut iter = stmt.query_map(params![path_into_blob(filename)], |row| row.get(0))?; + match iter.next() { + // File is missing, so it's definitely not a CACHEDIR.TAG + None => Ok(false), + Some(result) => Ok(result?), + } + } +} + +#[cfg(test)] +mod test { + use super::{LocalGeneration, NascentGeneration}; + use tempfile::NamedTempFile; + + #[test] + fn empty() { + let filename = NamedTempFile::new().unwrap().path().to_path_buf(); + { + let mut _gen = NascentGeneration::create(&filename).unwrap(); + // _gen is dropped here; the connection is close; the file + // should not be removed. + } + assert!(filename.exists()); + } + + #[test] + fn remembers_cachedir_tags() { + use crate::{ + backup_reason::Reason, backup_run::FsEntryBackupOutcome, fsentry::FilesystemEntry, + }; + use std::{fs::metadata, mem::drop, path::Path}; + + // Create a `Metadata` structure to pass to other functions (we don't care about the + // contents) + let src_file = NamedTempFile::new().unwrap(); + let metadata = metadata(src_file.path()).unwrap(); + + let dbfile = NamedTempFile::new().unwrap().path().to_path_buf(); + + let nontag_path1 = Path::new("/nontag1"); + let nontag_path2 = Path::new("/dir/nontag2"); + let tag_path1 = Path::new("/a_tag"); + let tag_path2 = Path::new("/another_dir/a_tag"); + + let mut gen = NascentGeneration::create(&dbfile).unwrap(); + + gen.insert( + FilesystemEntry::from_metadata(&nontag_path1, &metadata).unwrap(), + &[], + Reason::IsNew, + false, + ) + .unwrap(); + gen.insert( + FilesystemEntry::from_metadata(&tag_path1, &metadata).unwrap(), + &[], + Reason::IsNew, + true, + ) + .unwrap(); + + let entries = vec![ + Ok(FsEntryBackupOutcome { + entry: FilesystemEntry::from_metadata(&nontag_path2, &metadata).unwrap(), + ids: vec![], + reason: Reason::IsNew, + is_cachedir_tag: false, + }), + Ok(FsEntryBackupOutcome { + entry: FilesystemEntry::from_metadata(&tag_path2, &metadata).unwrap(), + ids: vec![], + reason: Reason::IsNew, + is_cachedir_tag: true, + }), + ]; + gen.insert_iter(entries.into_iter()).unwrap(); + + drop(gen); + + let gen = LocalGeneration::open(dbfile).unwrap(); + assert!(!gen.is_cachedir_tag(nontag_path1).unwrap()); + assert!(!gen.is_cachedir_tag(nontag_path2).unwrap()); + assert!(gen.is_cachedir_tag(tag_path1).unwrap()); + assert!(gen.is_cachedir_tag(tag_path2).unwrap()); + + // Nonexistent files are not cachedir tags + assert!(!gen.is_cachedir_tag(Path::new("/hello/world")).unwrap()); + assert!(!gen + .is_cachedir_tag(Path::new("/different path/to/another file.txt")) + .unwrap()); + } } |