From c11b8bb76c50cff6aa481f17907ae7200ac55c01 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Mon, 4 Jan 2021 13:30:21 +0200 Subject: feat! record whether file was backed up and why, in a generation This changes SQL schema. --- obnam.md | 57 +++++++++++++++++++++++++++++++++++++++ src/bin/obnam.rs | 7 ++++- src/cmd/backup.rs | 74 +++++++++++++++++++++++++++++++++++++++------------ src/cmd/list_files.rs | 42 +++++++++++++++++++++++++++++ src/cmd/mod.rs | 5 +++- src/cmd/restore.rs | 4 +-- src/generation.rs | 53 +++++++++++++++++++++--------------- subplot/client.py | 27 +++++++++++++++++++ subplot/client.yaml | 9 +++++++ 9 files changed, 235 insertions(+), 43 deletions(-) create mode 100644 src/cmd/list_files.rs diff --git a/obnam.md b/obnam.md index 7fa902f..0c4ab50 100644 --- a/obnam.md +++ b/obnam.md @@ -919,6 +919,63 @@ given a manifest of the directory live restored in rest in rest.yaml then files live.yaml and rest.yaml match ~~~ +## Backup or not for the right reason + +The decision of whether to back up a file or keep the version in the +previous backup is crucial. This scenario verifies that Obnam makes +the right decisions. + +### First backup backs up all files because they're new + +This scenario verifies that in the first backup all files are backed +up because they were new. + +~~~scenario +given an installed obnam +and a running chunk server +and a client config based on smoke.yaml +and a file live/data.dat containing some random data +and a manifest of the directory live in live.yaml +when I run obnam --config smoke.yaml backup +when I run obnam --config smoke.yaml list-files +then file live/data.dat was backed up because it was new +~~~ + +### All files in second backup are unchanged + +This scenario verifies that if a file hasn't been changed, it's not +backed up. + +~~~scenario +given an installed obnam +and a running chunk server +and a client config based on smoke.yaml +and a file live/data.dat containing some random data +and a manifest of the directory live in live.yaml +when I run obnam --config smoke.yaml backup +when I run obnam --config smoke.yaml backup +when I run obnam --config smoke.yaml list-files +then file live/data.dat was not backed up because it was unchanged +~~~ + +### Second backup back up changed file + +This scenario verifies that if a file has indeed been changed, it's +backed up. + +~~~scenario +given an installed obnam +and a running chunk server +and a client config based on smoke.yaml +and a file live/data.dat containing some random data +and a manifest of the directory live in live.yaml +when I run obnam --config smoke.yaml backup +given a file live/data.dat containing some random data +when I run obnam --config smoke.yaml backup +when I run obnam --config smoke.yaml list-files +then file live/data.dat was backed up because it was changed +~~~ + ## Tricky filenames Obnam needs to handle all filenames the underlying operating and file diff --git a/src/bin/obnam.rs b/src/bin/obnam.rs index 3ee7f56..f31884b 100644 --- a/src/bin/obnam.rs +++ b/src/bin/obnam.rs @@ -2,7 +2,7 @@ use log::{debug, error, info, LevelFilter}; use log4rs::append::file::FileAppender; use log4rs::config::{Appender, Config, Logger, Root}; use obnam::client::ClientConfig; -use obnam::cmd::{backup, list, restore}; +use obnam::cmd::{backup, list, list_files, restore}; use std::path::{Path, PathBuf}; use structopt::StructOpt; @@ -21,6 +21,7 @@ fn main() -> anyhow::Result<()> { let result = match opt.cmd { Command::Backup => backup(&config, BUFFER_SIZE), Command::List => list(&config), + Command::ListFiles { gen_id } => list_files(&config, &gen_id), Command::Restore { gen_id, to } => restore(&config, &gen_id, &to), }; @@ -48,6 +49,10 @@ struct Opt { enum Command { Backup, List, + ListFiles { + #[structopt(default_value = "latest")] + gen_id: String, + }, Restore { #[structopt()] gen_id: String, diff --git a/src/cmd/backup.rs b/src/cmd/backup.rs index 4d13fe7..1521cab 100644 --- a/src/cmd/backup.rs +++ b/src/cmd/backup.rs @@ -4,6 +4,9 @@ use crate::fsiter::FsIterator; use crate::generation::{LocalGeneration, NascentGeneration}; use indicatif::{ProgressBar, ProgressStyle}; use log::{debug, info}; +use rusqlite::types::ToSqlOutput; +use rusqlite::ToSql; +use std::fmt; use tempfile::NamedTempFile; pub fn backup(config: &ClientConfig, buffer_size: usize) -> anyhow::Result<()> { @@ -43,7 +46,9 @@ pub fn backup(config: &ClientConfig, buffer_size: usize) -> anyhow::Result<()> { let path = &entry.pathbuf(); info!("backup: {}", path.display()); progress.set_message(&format!("{}", path.display())); - client.upload_filesystem_entry(entry, buffer_size) + let (new_entry, ids) = + client.upload_filesystem_entry(entry, buffer_size)?; + Ok((new_entry, ids, Reason::IsNew)) } } }))?; @@ -60,16 +65,22 @@ pub fn backup(config: &ClientConfig, buffer_size: usize) -> anyhow::Result<()> { let path = &entry.pathbuf(); info!("backup: {}", path.display()); progress.set_message(&format!("{}", path.display())); - if needs_backup(&old, &entry) { - client.upload_filesystem_entry(entry, buffer_size) - } else { - let fileno = old.get_fileno(&entry.pathbuf())?; - let ids = if let Some(fileno) = fileno { - old.chunkids(fileno)? - } else { - vec![] - }; - Ok((entry.clone(), ids)) + let reason = needs_backup(&old, &entry); + match reason { + Reason::IsNew | Reason::Changed | Reason::Error => { + let (new_entry, ids) = + client.upload_filesystem_entry(entry, buffer_size)?; + Ok((new_entry, ids, reason)) + } + Reason::Unchanged => { + let fileno = old.get_fileno(&entry.pathbuf())?; + let ids = if let Some(fileno) = fileno { + old.chunkids(fileno)? + } else { + vec![] + }; + Ok((entry.clone(), ids, Reason::Unchanged)) + } } } } @@ -85,13 +96,42 @@ pub fn backup(config: &ClientConfig, buffer_size: usize) -> anyhow::Result<()> { let gen_id = client.upload_generation(&newname, buffer_size)?; println!("gen id: {}", gen_id); - // Delete the temporary file. + // Delete the temporary file.q std::fs::remove_file(&newname)?; std::fs::remove_file(&oldname)?; Ok(()) } +#[derive(Debug)] +pub enum Reason { + IsNew, + Changed, + Unchanged, + Error, +} + +impl ToSql for Reason { + fn to_sql(&self) -> rusqlite::Result { + Ok(ToSqlOutput::Owned(rusqlite::types::Value::Text(format!( + "{}", + self + )))) + } +} + +impl fmt::Display for Reason { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let reason = match self { + Reason::IsNew => "new", + Reason::Changed => "changed", + Reason::Unchanged => "unchanged", + Reason::Error => "error", + }; + write!(f, "{}", reason) + } +} + fn create_progress_bar(verbose: bool) -> ProgressBar { let progress = if verbose { ProgressBar::new(0) @@ -109,7 +149,7 @@ fn create_progress_bar(verbose: bool) -> ProgressBar { progress } -fn needs_backup(old: &LocalGeneration, new_entry: &FilesystemEntry) -> bool { +fn needs_backup(old: &LocalGeneration, new_entry: &FilesystemEntry) -> Reason { let new_name = new_entry.pathbuf(); match old.get_file(&new_name) { // File is not in old generation. @@ -118,17 +158,17 @@ fn needs_backup(old: &LocalGeneration, new_entry: &FilesystemEntry) -> bool { "needs_backup: file is not in old generation, needs backup: {:?}", new_name ); - true + Reason::IsNew } // File is in old generation. Has its metadata changed? Ok(Some(old_entry)) => { if file_has_changed(&old_entry, new_entry) { debug!("needs_backup: file has changed: {:?}", new_name); - true + Reason::Changed } else { debug!("needs_backup: file has NOT changed: {:?}", new_name); - false + Reason::Unchanged } } @@ -139,7 +179,7 @@ fn needs_backup(old: &LocalGeneration, new_entry: &FilesystemEntry) -> bool { "needs_backup: lookup in old generation returned error, ignored: {:?}: {}", new_name, err ); - true + Reason::Error } } } diff --git a/src/cmd/list_files.rs b/src/cmd/list_files.rs new file mode 100644 index 0000000..aa4bed0 --- /dev/null +++ b/src/cmd/list_files.rs @@ -0,0 +1,42 @@ +use crate::client::BackupClient; +use crate::client::ClientConfig; +use crate::error::ObnamError; +use crate::fsentry::{FilesystemEntry, FilesystemKind}; +use tempfile::NamedTempFile; + +pub fn list_files(config: &ClientConfig, gen_ref: &str) -> anyhow::Result<()> { + // Create a named temporary file. We don't meed the open file + // handle, so we discard that. + let dbname = { + let temp = NamedTempFile::new()?; + let (_, dbname) = temp.keep()?; + dbname + }; + + let client = BackupClient::new(&config.server_url)?; + + let genlist = client.list_generations()?; + let gen_id: String = match genlist.resolve(gen_ref) { + None => return Err(ObnamError::UnknownGeneration(gen_ref.to_string()).into()), + Some(id) => id, + }; + + let gen = client.fetch_generation(&gen_id, &dbname)?; + for (_, entry, reason) in gen.files()? { + println!("{}", format_entry(&entry, &reason)); + } + + // Delete the temporary file. + std::fs::remove_file(&dbname)?; + + Ok(()) +} + +fn format_entry(e: &FilesystemEntry, reason: &str) -> String { + let kind = match e.kind() { + FilesystemKind::Regular => "-", + FilesystemKind::Directory => "d", + FilesystemKind::Symlink => "l", + }; + format!("{} {} ({})", kind, e.pathbuf().display(), reason) +} diff --git a/src/cmd/mod.rs b/src/cmd/mod.rs index ca5ff42..2919d88 100644 --- a/src/cmd/mod.rs +++ b/src/cmd/mod.rs @@ -1,8 +1,11 @@ mod backup; -pub use backup::backup; +pub use backup::{backup, Reason}; mod list; pub use list::list; +mod list_files; +pub use list_files::list_files; + pub mod restore; pub use restore::restore; diff --git a/src/cmd/restore.rs b/src/cmd/restore.rs index 53e168a..0efdffb 100644 --- a/src/cmd/restore.rs +++ b/src/cmd/restore.rs @@ -35,10 +35,10 @@ pub fn restore(config: &ClientConfig, gen_ref: &str, to: &Path) -> anyhow::Resul let gen = client.fetch_generation(&gen_id, &dbname)?; info!("restore file count: {}", gen.file_count()?); let progress = create_progress_bar(gen.file_count()?, true); - for (fileid, entry) in gen.files()? { + for (fileid, entry, _) in gen.files()? { restore_generation(&client, &gen, fileid, &entry, &to, &progress)?; } - for (_, entry) in gen.files()? { + for (_, entry, _) in gen.files()? { if entry.is_dir() { restore_directory_metadata(&entry, &to)?; } diff --git a/src/generation.rs b/src/generation.rs index b55e244..c91702f 100644 --- a/src/generation.rs +++ b/src/generation.rs @@ -1,4 +1,5 @@ use crate::chunkid::ChunkId; +use crate::cmd::Reason; use crate::fsentry::FilesystemEntry; use rusqlite::Connection; use std::path::Path; @@ -26,23 +27,28 @@ impl NascentGeneration { self.fileno } - pub fn insert(&mut self, e: FilesystemEntry, ids: &[ChunkId]) -> anyhow::Result<()> { + pub fn insert( + &mut self, + e: FilesystemEntry, + ids: &[ChunkId], + reason: Reason, + ) -> anyhow::Result<()> { let t = self.conn.transaction()?; self.fileno += 1; - sql::insert_one(&t, e, self.fileno, ids)?; + sql::insert_one(&t, e, self.fileno, ids, reason)?; t.commit()?; Ok(()) } - pub fn insert_iter( + pub fn insert_iter<'a>( &mut self, - entries: impl Iterator)>>, + entries: impl Iterator, Reason)>>, ) -> anyhow::Result<()> { let t = self.conn.transaction()?; for r in entries { - let (e, ids) = r?; + let (e, ids, reason) = r?; self.fileno += 1; - sql::insert_one(&t, e, self.fileno, &ids[..])?; + sql::insert_one(&t, e, self.fileno, &ids[..], reason)?; } t.commit()?; Ok(()) @@ -114,7 +120,7 @@ impl LocalGeneration { Ok(sql::file_count(&self.conn)?) } - pub fn files(&self) -> anyhow::Result> { + pub fn files(&self) -> anyhow::Result> { Ok(sql::files(&self.conn)?) } @@ -133,6 +139,7 @@ impl LocalGeneration { mod sql { use crate::chunkid::ChunkId; + use crate::cmd::Reason; use crate::error::ObnamError; use crate::fsentry::FilesystemEntry; use rusqlite::{params, Connection, OpenFlags, Row, Transaction}; @@ -143,7 +150,7 @@ mod sql { let flags = OpenFlags::SQLITE_OPEN_CREATE | OpenFlags::SQLITE_OPEN_READ_WRITE; let conn = Connection::open_with_flags(filename, flags)?; conn.execute( - "CREATE TABLE files (fileno INTEGER PRIMARY KEY, filename BLOB, json TEXT)", + "CREATE TABLE files (fileno INTEGER PRIMARY KEY, filename BLOB, json TEXT, reason TEXT)", params![], )?; conn.execute( @@ -168,11 +175,12 @@ mod sql { e: FilesystemEntry, fileno: i64, ids: &[ChunkId], + reason: Reason, ) -> anyhow::Result<()> { let json = serde_json::to_string(&e)?; t.execute( - "INSERT INTO files (fileno, filename, json) VALUES (?1, ?2, ?3)", - params![fileno, path_into_blob(&e.pathbuf()), &json], + "INSERT INTO files (fileno, filename, json, reason) VALUES (?1, ?2, ?3, ?4)", + params![fileno, path_into_blob(&e.pathbuf()), &json, reason,], )?; for id in ids { t.execute( @@ -187,10 +195,11 @@ mod sql { path.as_os_str().as_bytes().to_vec() } - pub fn row_to_entry(row: &Row) -> rusqlite::Result<(i64, String)> { + pub fn row_to_entry(row: &Row) -> rusqlite::Result<(i64, String, String)> { let fileno: i64 = row.get(row.column_index("fileno")?)?; let json: String = row.get(row.column_index("json")?)?; - Ok((fileno, json)) + let reason: String = row.get(row.column_index("reason")?)?; + Ok((fileno, json, reason)) } pub fn file_count(conn: &Connection) -> anyhow::Result { @@ -201,14 +210,14 @@ mod sql { Ok(count) } - pub fn files(conn: &Connection) -> anyhow::Result> { + pub fn files(conn: &Connection) -> anyhow::Result> { let mut stmt = conn.prepare("SELECT * FROM files")?; let iter = stmt.query_map(params![], |row| row_to_entry(row))?; - let mut files: Vec<(i64, FilesystemEntry)> = vec![]; + let mut files: Vec<(i64, FilesystemEntry, String)> = vec![]; for x in iter { - let (fileno, json) = x?; + let (fileno, json, reason) = x?; let entry = serde_json::from_str(&json)?; - files.push((fileno, entry)); + files.push((fileno, entry, reason)); } Ok(files) } @@ -228,31 +237,31 @@ mod sql { pub fn get_file(conn: &Connection, filename: &Path) -> anyhow::Result> { match get_file_and_fileno(conn, filename)? { None => Ok(None), - Some((_, e)) => Ok(Some(e)), + Some((_, e, _)) => Ok(Some(e)), } } pub fn get_fileno(conn: &Connection, filename: &Path) -> anyhow::Result> { match get_file_and_fileno(conn, filename)? { None => Ok(None), - Some((id, _)) => Ok(Some(id)), + Some((id, _, _)) => Ok(Some(id)), } } fn get_file_and_fileno( conn: &Connection, filename: &Path, - ) -> anyhow::Result> { - let mut stmt = conn.prepare("SELECT fileno, json FROM files WHERE filename = ?1")?; + ) -> anyhow::Result> { + let mut stmt = conn.prepare("SELECT * FROM files WHERE filename = ?1")?; let mut iter = stmt.query_map(params![path_into_blob(filename)], |row| row_to_entry(row))?; match iter.next() { None => Ok(None), Some(Err(e)) => Err(e.into()), - Some(Ok((fileno, json))) => { + Some(Ok((fileno, json, reason))) => { let entry = serde_json::from_str(&json)?; if iter.next() == None { - Ok(Some((fileno, entry))) + Ok(Some((fileno, entry, reason))) } else { Err(ObnamError::TooManyFiles(filename.to_path_buf()).into()) } diff --git a/subplot/client.py b/subplot/client.py index 0a09d31..c1f5159 100644 --- a/subplot/client.py +++ b/subplot/client.py @@ -68,3 +68,30 @@ def generation_list_contains(ctx, gen_id=None): runcmd_stdout_contains = globals()["runcmd_stdout_contains"] gen_id = ctx["vars"][gen_id] runcmd_stdout_contains(ctx, text=gen_id) + + +def file_was_new(ctx, filename=None): + assert_eq = globals()["assert_eq"] + reason = get_backup_reason(ctx, filename) + assert_eq(reason, "(new)") + + +def file_was_changed(ctx, filename=None): + assert_eq = globals()["assert_eq"] + reason = get_backup_reason(ctx, filename) + assert_eq(reason, "(changed)") + + +def file_was_unchanged(ctx, filename=None): + assert_eq = globals()["assert_eq"] + reason = get_backup_reason(ctx, filename) + assert_eq(reason, "(unchanged)") + + +def get_backup_reason(ctx, filename): + runcmd_get_stdout = globals()["runcmd_get_stdout"] + stdout = runcmd_get_stdout(ctx) + lines = stdout.splitlines() + lines = [line for line in lines if filename in line] + line = lines[0] + return line.split()[-1] diff --git a/subplot/client.yaml b/subplot/client.yaml index e526304..db55679 100644 --- a/subplot/client.yaml +++ b/subplot/client.yaml @@ -15,3 +15,12 @@ - then: "generation list contains <{gen_id}>" function: generation_list_contains + +- then: "file {filename} was backed up because it was new" + function: file_was_new + +- then: "file {filename} was backed up because it was changed" + function: file_was_changed + +- then: "file {filename} was not backed up because it was unchanged" + function: file_was_unchanged -- cgit v1.2.1