summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2020-12-31 15:32:51 +0200
committerLars Wirzenius <liw@liw.fi>2020-12-31 17:25:42 +0200
commit6171b3c2eb8ec9a7734e8ce00c81999500b5fbe6 (patch)
tree9388040e96f3a42679860f70c4540aedceff329d
parent85d0ce236677c35c85d76bfc345e36d29f585390 (diff)
downloadobnam2-6171b3c2eb8ec9a7734e8ce00c81999500b5fbe6.tar.gz
feat: incremental backup
This uses the previous, latest generation as a guideline to see what is new or changed.
-rw-r--r--src/client.rs1
-rw-r--r--src/cmd/backup.rs138
-rw-r--r--src/error.rs4
-rw-r--r--src/fsentry.rs2
-rw-r--r--src/generation.rs32
-rw-r--r--src/genlist.rs9
6 files changed, 150 insertions, 36 deletions
diff --git a/src/client.rs b/src/client.rs
index f76d817..616ceef 100644
--- a/src/client.rs
+++ b/src/client.rs
@@ -195,7 +195,6 @@ impl BackupClient {
debug!("list_generationgs: body={:?}", body);
let map: HashMap<String, ChunkMeta> = serde_yaml::from_slice(&body)?;
debug!("list_generations: map={:?}", map);
- eprintln!("list_generations: map={:?}", map);
let finished = map
.iter()
.map(|(id, meta)| FinishedGeneration::new(id, meta.ended().map_or("", |s| s)))
diff --git a/src/cmd/backup.rs b/src/cmd/backup.rs
index c9b7fc5..2a8e086 100644
--- a/src/cmd/backup.rs
+++ b/src/cmd/backup.rs
@@ -1,61 +1,100 @@
use crate::client::{BackupClient, ClientConfig};
+use crate::fsentry::FilesystemEntry;
use crate::fsiter::FsIterator;
-use crate::generation::NascentGeneration;
+use crate::generation::{LocalGeneration, NascentGeneration};
use indicatif::{ProgressBar, ProgressStyle};
-use log::info;
+use log::{debug, info};
use tempfile::NamedTempFile;
-const GUESS_FILE_COUNT: u64 = 0;
-
pub fn backup(config: &ClientConfig, buffer_size: usize) -> anyhow::Result<()> {
let client = BackupClient::new(&config.server_url)?;
// Create a named temporary file. We don't meed the open file
// handle, so we discard that.
- let dbname = {
+ let oldname = {
let temp = NamedTempFile::new()?;
let (_, dbname) = temp.keep()?;
dbname
};
+ // Create a named temporary file. We don't meed the open file
+ // handle, so we discard that.
+ let newname = {
+ let temp = NamedTempFile::new()?;
+ let (_, dbname) = temp.keep()?;
+ dbname
+ };
+
+ let genlist = client.list_generations()?;
{
- // Create the SQLite database using the named temporary file.
- // The fetching is in its own block so that the file handles
- // get closed and data flushed to disk.
- let mut gen = NascentGeneration::create(&dbname)?;
- let progress = create_progress_bar(GUESS_FILE_COUNT, true);
+ let iter = FsIterator::new(&config.root);
+ let mut new = NascentGeneration::create(&newname)?;
+ let progress = create_progress_bar(true);
progress.enable_steady_tick(100);
- gen.insert_iter(FsIterator::new(&config.root).map(|entry| {
- progress.inc(1);
- match entry {
- Err(err) => Err(err),
- Ok(entry) => {
- let path = &entry.pathbuf();
- info!("backup: {}", path.display());
- progress.set_message(&format!("{}", path.display()));
- client.upload_filesystem_entry(entry, buffer_size)
- }
+
+ match genlist.resolve("latest") {
+ None => {
+ info!("fresh backup without a previous generation");
+ new.insert_iter(iter.map(|entry| {
+ progress.inc(1);
+ match entry {
+ Err(err) => Err(err),
+ Ok(entry) => {
+ let path = &entry.pathbuf();
+ info!("backup: {}", path.display());
+ progress.set_message(&format!("{}", path.display()));
+ client.upload_filesystem_entry(entry, buffer_size)
+ }
+ }
+ }))?;
}
- }))?;
- progress.set_length(gen.file_count());
+ Some(old) => {
+ info!("incremental backup based on {}", old);
+ let old = client.fetch_generation(&old, &oldname)?;
+ progress.set_length(old.file_count()?.into());
+ new.insert_iter(iter.map(|entry| {
+ progress.inc(1);
+ match entry {
+ Err(err) => Err(err),
+ Ok(entry) => {
+ let path = &entry.pathbuf();
+ info!("backup: {}", path.display());
+ progress.set_message(&format!("{}", path.display()));
+ if needs_backup(&old, &entry) {
+ client.upload_filesystem_entry(entry, buffer_size)
+ } else {
+ let fileno = old.get_fileno(&entry.pathbuf())?;
+ let ids = if let Some(fileno) = fileno {
+ old.chunkids(fileno)?
+ } else {
+ vec![]
+ };
+ Ok((entry.clone(), ids))
+ }
+ }
+ }
+ }))?;
+ }
+ }
+ progress.set_length(new.file_count());
progress.finish();
- println!("file count: {}", gen.file_count());
}
// Upload the SQLite file, i.e., the named temporary file, which
// still exists, since we persisted it above.
- let gen_id = client.upload_generation(&dbname, buffer_size)?;
+ let gen_id = client.upload_generation(&newname, buffer_size)?;
println!("gen id: {}", gen_id);
// Delete the temporary file.
- std::fs::remove_file(&dbname)?;
+ std::fs::remove_file(&newname)?;
+ std::fs::remove_file(&oldname)?;
Ok(())
}
-fn create_progress_bar(file_count: u64, verbose: bool) -> ProgressBar {
+fn create_progress_bar(verbose: bool) -> ProgressBar {
let progress = if verbose {
- ProgressBar::new(file_count)
+ ProgressBar::new(0)
} else {
ProgressBar::hidden()
};
@@ -69,3 +108,48 @@ fn create_progress_bar(file_count: u64, verbose: bool) -> ProgressBar {
progress.set_style(ProgressStyle::default_bar().template(&parts.join("\n")));
progress
}
+
+fn needs_backup(old: &LocalGeneration, new_entry: &FilesystemEntry) -> bool {
+ let new_name = new_entry.pathbuf();
+ match old.get_file(&new_name) {
+ // File is not in old generation.
+ Ok(None) => {
+ debug!(
+ "needs_backup: file is not in old generation, needs backup: {:?}",
+ new_name
+ );
+ true
+ }
+
+ // File is in old generation. Has its metadata changed?
+ Ok(Some(old_entry)) => {
+ if file_has_changed(&old_entry, new_entry) {
+ debug!("needs_backup: file has changed: {:?}", new_name);
+ true
+ } else {
+ debug!("needs_backup: file has NOT changed: {:?}", new_name);
+ false
+ }
+ }
+
+ // There was an error, which we ignore, but we indicate the
+ // file needs to be backed up now.
+ Err(err) => {
+ debug!(
+ "needs_backup: lookup in old generation returned error, ignored: {:?}: {}",
+ new_name, err
+ );
+ true
+ }
+ }
+}
+
+fn file_has_changed(old: &FilesystemEntry, new: &FilesystemEntry) -> bool {
+ let unchanged = old.kind() == new.kind()
+ && old.len() == new.len()
+ && old.mode() == new.mode()
+ && old.mtime() == new.mtime()
+ && old.mtime_ns() == new.mtime_ns()
+ && old.symlink_target() == new.symlink_target();
+ !unchanged
+}
diff --git a/src/error.rs b/src/error.rs
index 1e750e5..3b3f573 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -1,3 +1,4 @@
+use std::path::PathBuf;
use thiserror::Error;
/// Define all the kinds of errors any part of this crate can return.
@@ -5,4 +6,7 @@ use thiserror::Error;
pub enum ObnamError {
#[error("Can't find backup '{0}'")]
UnknownGeneration(String),
+
+ #[error("Generation has more than one file with the name {0}")]
+ TooManyFiles(PathBuf),
}
diff --git a/src/fsentry.rs b/src/fsentry.rs
index 471b1bd..eae11b4 100644
--- a/src/fsentry.rs
+++ b/src/fsentry.rs
@@ -15,7 +15,7 @@ use std::path::{Path, PathBuf};
///
/// This is everything Obnam cares about each file system object, when
/// making a backup.
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FilesystemEntry {
kind: FilesystemKind,
path: Vec<u8>,
diff --git a/src/generation.rs b/src/generation.rs
index 4e7589c..7a4b71b 100644
--- a/src/generation.rs
+++ b/src/generation.rs
@@ -1,4 +1,5 @@
use crate::chunkid::ChunkId;
+use crate::error::ObnamError;
use crate::fsentry::FilesystemEntry;
use rusqlite::{params, Connection, OpenFlags, Row, Transaction};
use std::path::Path;
@@ -184,4 +185,35 @@ impl LocalGeneration {
}
Ok(ids)
}
+
+ pub fn get_file(&self, filename: &Path) -> anyhow::Result<Option<FilesystemEntry>> {
+ match self.get_file_and_fileno(filename)? {
+ None => Ok(None),
+ Some((_, e)) => Ok(Some(e)),
+ }
+ }
+
+ pub fn get_fileno(&self, filename: &Path) -> anyhow::Result<Option<u64>> {
+ match self.get_file_and_fileno(filename)? {
+ None => Ok(None),
+ Some((id, _)) => Ok(Some(id)),
+ }
+ }
+
+ fn get_file_and_fileno(
+ &self,
+ filename: &Path,
+ ) -> anyhow::Result<Option<(u64, FilesystemEntry)>> {
+ let files = self.files()?;
+ let files: Vec<(u64, FilesystemEntry)> = files
+ .iter()
+ .filter(|(_, e)| e.pathbuf() == filename)
+ .map(|(id, e)| (*id, e.clone()))
+ .collect();
+ match files.len() {
+ 0 => Ok(None),
+ 1 => Ok(Some((files[0].0, files[0].1.clone()))),
+ _ => return Err(ObnamError::TooManyFiles(filename.to_path_buf()).into()),
+ }
+ }
}
diff --git a/src/genlist.rs b/src/genlist.rs
index 9c511f2..10c614e 100644
--- a/src/genlist.rs
+++ b/src/genlist.rs
@@ -18,11 +18,9 @@ impl GenerationList {
pub fn resolve(&self, genref: &str) -> Option<String> {
let gen = if self.list.is_empty() {
- eprintln!("genlist: empty");
None
} else if genref == "latest" {
let i = self.list.len() - 1;
- eprintln!("genlist: latest={} of {}", i, self.list.len());
Some(self.list[i].clone())
} else {
let genref: ChunkId = genref.parse().unwrap();
@@ -31,18 +29,15 @@ impl GenerationList {
.filter(|gen| gen.id() == genref)
.map(|gen| gen.clone())
.collect();
- eprintln!("genlist: hits={}", hits.len());
if hits.len() == 1 {
Some(hits[0].clone())
} else {
None
}
};
- let ret = match gen {
+ match gen {
None => None,
Some(gen) => Some(gen.id().to_string()),
- };
- eprintln!("genlist: return {:?}", ret);
- ret
+ }
}
}