summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2021-01-04 13:30:21 +0200
committerLars Wirzenius <liw@liw.fi>2021-01-04 15:02:33 +0200
commitc11b8bb76c50cff6aa481f17907ae7200ac55c01 (patch)
treef275d30b15891a71f11d8012f0573268310567bf
parentf73b2a919dc2fe2d92eef6df9b7cab25af083fb3 (diff)
downloadobnam2-c11b8bb76c50cff6aa481f17907ae7200ac55c01.tar.gz
feat! record whether file was backed up and why, in a generation
This changes SQL schema.
-rw-r--r--obnam.md57
-rw-r--r--src/bin/obnam.rs7
-rw-r--r--src/cmd/backup.rs74
-rw-r--r--src/cmd/list_files.rs42
-rw-r--r--src/cmd/mod.rs5
-rw-r--r--src/cmd/restore.rs4
-rw-r--r--src/generation.rs53
-rw-r--r--subplot/client.py27
-rw-r--r--subplot/client.yaml9
9 files changed, 235 insertions, 43 deletions
diff --git a/obnam.md b/obnam.md
index 7fa902f..0c4ab50 100644
--- a/obnam.md
+++ b/obnam.md
@@ -919,6 +919,63 @@ given a manifest of the directory live restored in rest in rest.yaml
then files live.yaml and rest.yaml match
~~~
+## Backup or not for the right reason
+
+The decision of whether to back up a file or keep the version in the
+previous backup is crucial. This scenario verifies that Obnam makes
+the right decisions.
+
+### First backup backs up all files because they're new
+
+This scenario verifies that in the first backup all files are backed
+up because they were new.
+
+~~~scenario
+given an installed obnam
+and a running chunk server
+and a client config based on smoke.yaml
+and a file live/data.dat containing some random data
+and a manifest of the directory live in live.yaml
+when I run obnam --config smoke.yaml backup
+when I run obnam --config smoke.yaml list-files
+then file live/data.dat was backed up because it was new
+~~~
+
+### All files in second backup are unchanged
+
+This scenario verifies that if a file hasn't been changed, it's not
+backed up.
+
+~~~scenario
+given an installed obnam
+and a running chunk server
+and a client config based on smoke.yaml
+and a file live/data.dat containing some random data
+and a manifest of the directory live in live.yaml
+when I run obnam --config smoke.yaml backup
+when I run obnam --config smoke.yaml backup
+when I run obnam --config smoke.yaml list-files
+then file live/data.dat was not backed up because it was unchanged
+~~~
+
+### Second backup back up changed file
+
+This scenario verifies that if a file has indeed been changed, it's
+backed up.
+
+~~~scenario
+given an installed obnam
+and a running chunk server
+and a client config based on smoke.yaml
+and a file live/data.dat containing some random data
+and a manifest of the directory live in live.yaml
+when I run obnam --config smoke.yaml backup
+given a file live/data.dat containing some random data
+when I run obnam --config smoke.yaml backup
+when I run obnam --config smoke.yaml list-files
+then file live/data.dat was backed up because it was changed
+~~~
+
## Tricky filenames
Obnam needs to handle all filenames the underlying operating and file
diff --git a/src/bin/obnam.rs b/src/bin/obnam.rs
index 3ee7f56..f31884b 100644
--- a/src/bin/obnam.rs
+++ b/src/bin/obnam.rs
@@ -2,7 +2,7 @@ use log::{debug, error, info, LevelFilter};
use log4rs::append::file::FileAppender;
use log4rs::config::{Appender, Config, Logger, Root};
use obnam::client::ClientConfig;
-use obnam::cmd::{backup, list, restore};
+use obnam::cmd::{backup, list, list_files, restore};
use std::path::{Path, PathBuf};
use structopt::StructOpt;
@@ -21,6 +21,7 @@ fn main() -> anyhow::Result<()> {
let result = match opt.cmd {
Command::Backup => backup(&config, BUFFER_SIZE),
Command::List => list(&config),
+ Command::ListFiles { gen_id } => list_files(&config, &gen_id),
Command::Restore { gen_id, to } => restore(&config, &gen_id, &to),
};
@@ -48,6 +49,10 @@ struct Opt {
enum Command {
Backup,
List,
+ ListFiles {
+ #[structopt(default_value = "latest")]
+ gen_id: String,
+ },
Restore {
#[structopt()]
gen_id: String,
diff --git a/src/cmd/backup.rs b/src/cmd/backup.rs
index 4d13fe7..1521cab 100644
--- a/src/cmd/backup.rs
+++ b/src/cmd/backup.rs
@@ -4,6 +4,9 @@ use crate::fsiter::FsIterator;
use crate::generation::{LocalGeneration, NascentGeneration};
use indicatif::{ProgressBar, ProgressStyle};
use log::{debug, info};
+use rusqlite::types::ToSqlOutput;
+use rusqlite::ToSql;
+use std::fmt;
use tempfile::NamedTempFile;
pub fn backup(config: &ClientConfig, buffer_size: usize) -> anyhow::Result<()> {
@@ -43,7 +46,9 @@ pub fn backup(config: &ClientConfig, buffer_size: usize) -> anyhow::Result<()> {
let path = &entry.pathbuf();
info!("backup: {}", path.display());
progress.set_message(&format!("{}", path.display()));
- client.upload_filesystem_entry(entry, buffer_size)
+ let (new_entry, ids) =
+ client.upload_filesystem_entry(entry, buffer_size)?;
+ Ok((new_entry, ids, Reason::IsNew))
}
}
}))?;
@@ -60,16 +65,22 @@ pub fn backup(config: &ClientConfig, buffer_size: usize) -> anyhow::Result<()> {
let path = &entry.pathbuf();
info!("backup: {}", path.display());
progress.set_message(&format!("{}", path.display()));
- if needs_backup(&old, &entry) {
- client.upload_filesystem_entry(entry, buffer_size)
- } else {
- let fileno = old.get_fileno(&entry.pathbuf())?;
- let ids = if let Some(fileno) = fileno {
- old.chunkids(fileno)?
- } else {
- vec![]
- };
- Ok((entry.clone(), ids))
+ let reason = needs_backup(&old, &entry);
+ match reason {
+ Reason::IsNew | Reason::Changed | Reason::Error => {
+ let (new_entry, ids) =
+ client.upload_filesystem_entry(entry, buffer_size)?;
+ Ok((new_entry, ids, reason))
+ }
+ Reason::Unchanged => {
+ let fileno = old.get_fileno(&entry.pathbuf())?;
+ let ids = if let Some(fileno) = fileno {
+ old.chunkids(fileno)?
+ } else {
+ vec![]
+ };
+ Ok((entry.clone(), ids, Reason::Unchanged))
+ }
}
}
}
@@ -85,13 +96,42 @@ pub fn backup(config: &ClientConfig, buffer_size: usize) -> anyhow::Result<()> {
let gen_id = client.upload_generation(&newname, buffer_size)?;
println!("gen id: {}", gen_id);
- // Delete the temporary file.
+ // Delete the temporary file.q
std::fs::remove_file(&newname)?;
std::fs::remove_file(&oldname)?;
Ok(())
}
+#[derive(Debug)]
+pub enum Reason {
+ IsNew,
+ Changed,
+ Unchanged,
+ Error,
+}
+
+impl ToSql for Reason {
+ fn to_sql(&self) -> rusqlite::Result<ToSqlOutput> {
+ Ok(ToSqlOutput::Owned(rusqlite::types::Value::Text(format!(
+ "{}",
+ self
+ ))))
+ }
+}
+
+impl fmt::Display for Reason {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ let reason = match self {
+ Reason::IsNew => "new",
+ Reason::Changed => "changed",
+ Reason::Unchanged => "unchanged",
+ Reason::Error => "error",
+ };
+ write!(f, "{}", reason)
+ }
+}
+
fn create_progress_bar(verbose: bool) -> ProgressBar {
let progress = if verbose {
ProgressBar::new(0)
@@ -109,7 +149,7 @@ fn create_progress_bar(verbose: bool) -> ProgressBar {
progress
}
-fn needs_backup(old: &LocalGeneration, new_entry: &FilesystemEntry) -> bool {
+fn needs_backup(old: &LocalGeneration, new_entry: &FilesystemEntry) -> Reason {
let new_name = new_entry.pathbuf();
match old.get_file(&new_name) {
// File is not in old generation.
@@ -118,17 +158,17 @@ fn needs_backup(old: &LocalGeneration, new_entry: &FilesystemEntry) -> bool {
"needs_backup: file is not in old generation, needs backup: {:?}",
new_name
);
- true
+ Reason::IsNew
}
// File is in old generation. Has its metadata changed?
Ok(Some(old_entry)) => {
if file_has_changed(&old_entry, new_entry) {
debug!("needs_backup: file has changed: {:?}", new_name);
- true
+ Reason::Changed
} else {
debug!("needs_backup: file has NOT changed: {:?}", new_name);
- false
+ Reason::Unchanged
}
}
@@ -139,7 +179,7 @@ fn needs_backup(old: &LocalGeneration, new_entry: &FilesystemEntry) -> bool {
"needs_backup: lookup in old generation returned error, ignored: {:?}: {}",
new_name, err
);
- true
+ Reason::Error
}
}
}
diff --git a/src/cmd/list_files.rs b/src/cmd/list_files.rs
new file mode 100644
index 0000000..aa4bed0
--- /dev/null
+++ b/src/cmd/list_files.rs
@@ -0,0 +1,42 @@
+use crate::client::BackupClient;
+use crate::client::ClientConfig;
+use crate::error::ObnamError;
+use crate::fsentry::{FilesystemEntry, FilesystemKind};
+use tempfile::NamedTempFile;
+
+pub fn list_files(config: &ClientConfig, gen_ref: &str) -> anyhow::Result<()> {
+ // Create a named temporary file. We don't meed the open file
+ // handle, so we discard that.
+ let dbname = {
+ let temp = NamedTempFile::new()?;
+ let (_, dbname) = temp.keep()?;
+ dbname
+ };
+
+ let client = BackupClient::new(&config.server_url)?;
+
+ let genlist = client.list_generations()?;
+ let gen_id: String = match genlist.resolve(gen_ref) {
+ None => return Err(ObnamError::UnknownGeneration(gen_ref.to_string()).into()),
+ Some(id) => id,
+ };
+
+ let gen = client.fetch_generation(&gen_id, &dbname)?;
+ for (_, entry, reason) in gen.files()? {
+ println!("{}", format_entry(&entry, &reason));
+ }
+
+ // Delete the temporary file.
+ std::fs::remove_file(&dbname)?;
+
+ Ok(())
+}
+
+fn format_entry(e: &FilesystemEntry, reason: &str) -> String {
+ let kind = match e.kind() {
+ FilesystemKind::Regular => "-",
+ FilesystemKind::Directory => "d",
+ FilesystemKind::Symlink => "l",
+ };
+ format!("{} {} ({})", kind, e.pathbuf().display(), reason)
+}
diff --git a/src/cmd/mod.rs b/src/cmd/mod.rs
index ca5ff42..2919d88 100644
--- a/src/cmd/mod.rs
+++ b/src/cmd/mod.rs
@@ -1,8 +1,11 @@
mod backup;
-pub use backup::backup;
+pub use backup::{backup, Reason};
mod list;
pub use list::list;
+mod list_files;
+pub use list_files::list_files;
+
pub mod restore;
pub use restore::restore;
diff --git a/src/cmd/restore.rs b/src/cmd/restore.rs
index 53e168a..0efdffb 100644
--- a/src/cmd/restore.rs
+++ b/src/cmd/restore.rs
@@ -35,10 +35,10 @@ pub fn restore(config: &ClientConfig, gen_ref: &str, to: &Path) -> anyhow::Resul
let gen = client.fetch_generation(&gen_id, &dbname)?;
info!("restore file count: {}", gen.file_count()?);
let progress = create_progress_bar(gen.file_count()?, true);
- for (fileid, entry) in gen.files()? {
+ for (fileid, entry, _) in gen.files()? {
restore_generation(&client, &gen, fileid, &entry, &to, &progress)?;
}
- for (_, entry) in gen.files()? {
+ for (_, entry, _) in gen.files()? {
if entry.is_dir() {
restore_directory_metadata(&entry, &to)?;
}
diff --git a/src/generation.rs b/src/generation.rs
index b55e244..c91702f 100644
--- a/src/generation.rs
+++ b/src/generation.rs
@@ -1,4 +1,5 @@
use crate::chunkid::ChunkId;
+use crate::cmd::Reason;
use crate::fsentry::FilesystemEntry;
use rusqlite::Connection;
use std::path::Path;
@@ -26,23 +27,28 @@ impl NascentGeneration {
self.fileno
}
- pub fn insert(&mut self, e: FilesystemEntry, ids: &[ChunkId]) -> anyhow::Result<()> {
+ pub fn insert(
+ &mut self,
+ e: FilesystemEntry,
+ ids: &[ChunkId],
+ reason: Reason,
+ ) -> anyhow::Result<()> {
let t = self.conn.transaction()?;
self.fileno += 1;
- sql::insert_one(&t, e, self.fileno, ids)?;
+ sql::insert_one(&t, e, self.fileno, ids, reason)?;
t.commit()?;
Ok(())
}
- pub fn insert_iter(
+ pub fn insert_iter<'a>(
&mut self,
- entries: impl Iterator<Item = anyhow::Result<(FilesystemEntry, Vec<ChunkId>)>>,
+ entries: impl Iterator<Item = anyhow::Result<(FilesystemEntry, Vec<ChunkId>, Reason)>>,
) -> anyhow::Result<()> {
let t = self.conn.transaction()?;
for r in entries {
- let (e, ids) = r?;
+ let (e, ids, reason) = r?;
self.fileno += 1;
- sql::insert_one(&t, e, self.fileno, &ids[..])?;
+ sql::insert_one(&t, e, self.fileno, &ids[..], reason)?;
}
t.commit()?;
Ok(())
@@ -114,7 +120,7 @@ impl LocalGeneration {
Ok(sql::file_count(&self.conn)?)
}
- pub fn files(&self) -> anyhow::Result<Vec<(i64, FilesystemEntry)>> {
+ pub fn files(&self) -> anyhow::Result<Vec<(i64, FilesystemEntry, String)>> {
Ok(sql::files(&self.conn)?)
}
@@ -133,6 +139,7 @@ impl LocalGeneration {
mod sql {
use crate::chunkid::ChunkId;
+ use crate::cmd::Reason;
use crate::error::ObnamError;
use crate::fsentry::FilesystemEntry;
use rusqlite::{params, Connection, OpenFlags, Row, Transaction};
@@ -143,7 +150,7 @@ mod sql {
let flags = OpenFlags::SQLITE_OPEN_CREATE | OpenFlags::SQLITE_OPEN_READ_WRITE;
let conn = Connection::open_with_flags(filename, flags)?;
conn.execute(
- "CREATE TABLE files (fileno INTEGER PRIMARY KEY, filename BLOB, json TEXT)",
+ "CREATE TABLE files (fileno INTEGER PRIMARY KEY, filename BLOB, json TEXT, reason TEXT)",
params![],
)?;
conn.execute(
@@ -168,11 +175,12 @@ mod sql {
e: FilesystemEntry,
fileno: i64,
ids: &[ChunkId],
+ reason: Reason,
) -> anyhow::Result<()> {
let json = serde_json::to_string(&e)?;
t.execute(
- "INSERT INTO files (fileno, filename, json) VALUES (?1, ?2, ?3)",
- params![fileno, path_into_blob(&e.pathbuf()), &json],
+ "INSERT INTO files (fileno, filename, json, reason) VALUES (?1, ?2, ?3, ?4)",
+ params![fileno, path_into_blob(&e.pathbuf()), &json, reason,],
)?;
for id in ids {
t.execute(
@@ -187,10 +195,11 @@ mod sql {
path.as_os_str().as_bytes().to_vec()
}
- pub fn row_to_entry(row: &Row) -> rusqlite::Result<(i64, String)> {
+ pub fn row_to_entry(row: &Row) -> rusqlite::Result<(i64, String, String)> {
let fileno: i64 = row.get(row.column_index("fileno")?)?;
let json: String = row.get(row.column_index("json")?)?;
- Ok((fileno, json))
+ let reason: String = row.get(row.column_index("reason")?)?;
+ Ok((fileno, json, reason))
}
pub fn file_count(conn: &Connection) -> anyhow::Result<i64> {
@@ -201,14 +210,14 @@ mod sql {
Ok(count)
}
- pub fn files(conn: &Connection) -> anyhow::Result<Vec<(i64, FilesystemEntry)>> {
+ pub fn files(conn: &Connection) -> anyhow::Result<Vec<(i64, FilesystemEntry, String)>> {
let mut stmt = conn.prepare("SELECT * FROM files")?;
let iter = stmt.query_map(params![], |row| row_to_entry(row))?;
- let mut files: Vec<(i64, FilesystemEntry)> = vec![];
+ let mut files: Vec<(i64, FilesystemEntry, String)> = vec![];
for x in iter {
- let (fileno, json) = x?;
+ let (fileno, json, reason) = x?;
let entry = serde_json::from_str(&json)?;
- files.push((fileno, entry));
+ files.push((fileno, entry, reason));
}
Ok(files)
}
@@ -228,31 +237,31 @@ mod sql {
pub fn get_file(conn: &Connection, filename: &Path) -> anyhow::Result<Option<FilesystemEntry>> {
match get_file_and_fileno(conn, filename)? {
None => Ok(None),
- Some((_, e)) => Ok(Some(e)),
+ Some((_, e, _)) => Ok(Some(e)),
}
}
pub fn get_fileno(conn: &Connection, filename: &Path) -> anyhow::Result<Option<i64>> {
match get_file_and_fileno(conn, filename)? {
None => Ok(None),
- Some((id, _)) => Ok(Some(id)),
+ Some((id, _, _)) => Ok(Some(id)),
}
}
fn get_file_and_fileno(
conn: &Connection,
filename: &Path,
- ) -> anyhow::Result<Option<(i64, FilesystemEntry)>> {
- let mut stmt = conn.prepare("SELECT fileno, json FROM files WHERE filename = ?1")?;
+ ) -> anyhow::Result<Option<(i64, FilesystemEntry, String)>> {
+ let mut stmt = conn.prepare("SELECT * FROM files WHERE filename = ?1")?;
let mut iter =
stmt.query_map(params![path_into_blob(filename)], |row| row_to_entry(row))?;
match iter.next() {
None => Ok(None),
Some(Err(e)) => Err(e.into()),
- Some(Ok((fileno, json))) => {
+ Some(Ok((fileno, json, reason))) => {
let entry = serde_json::from_str(&json)?;
if iter.next() == None {
- Ok(Some((fileno, entry)))
+ Ok(Some((fileno, entry, reason)))
} else {
Err(ObnamError::TooManyFiles(filename.to_path_buf()).into())
}
diff --git a/subplot/client.py b/subplot/client.py
index 0a09d31..c1f5159 100644
--- a/subplot/client.py
+++ b/subplot/client.py
@@ -68,3 +68,30 @@ def generation_list_contains(ctx, gen_id=None):
runcmd_stdout_contains = globals()["runcmd_stdout_contains"]
gen_id = ctx["vars"][gen_id]
runcmd_stdout_contains(ctx, text=gen_id)
+
+
+def file_was_new(ctx, filename=None):
+ assert_eq = globals()["assert_eq"]
+ reason = get_backup_reason(ctx, filename)
+ assert_eq(reason, "(new)")
+
+
+def file_was_changed(ctx, filename=None):
+ assert_eq = globals()["assert_eq"]
+ reason = get_backup_reason(ctx, filename)
+ assert_eq(reason, "(changed)")
+
+
+def file_was_unchanged(ctx, filename=None):
+ assert_eq = globals()["assert_eq"]
+ reason = get_backup_reason(ctx, filename)
+ assert_eq(reason, "(unchanged)")
+
+
+def get_backup_reason(ctx, filename):
+ runcmd_get_stdout = globals()["runcmd_get_stdout"]
+ stdout = runcmd_get_stdout(ctx)
+ lines = stdout.splitlines()
+ lines = [line for line in lines if filename in line]
+ line = lines[0]
+ return line.split()[-1]
diff --git a/subplot/client.yaml b/subplot/client.yaml
index e526304..db55679 100644
--- a/subplot/client.yaml
+++ b/subplot/client.yaml
@@ -15,3 +15,12 @@
- then: "generation list contains <{gen_id}>"
function: generation_list_contains
+
+- then: "file {filename} was backed up because it was new"
+ function: file_was_new
+
+- then: "file {filename} was backed up because it was changed"
+ function: file_was_changed
+
+- then: "file {filename} was not backed up because it was unchanged"
+ function: file_was_unchanged