From 6171b3c2eb8ec9a7734e8ce00c81999500b5fbe6 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Thu, 31 Dec 2020 15:32:51 +0200 Subject: feat: incremental backup This uses the previous, latest generation as a guideline to see what is new or changed. --- src/client.rs | 1 - src/cmd/backup.rs | 138 +++++++++++++++++++++++++++++++++++++++++++----------- src/error.rs | 4 ++ src/fsentry.rs | 2 +- src/generation.rs | 32 +++++++++++++ src/genlist.rs | 9 +--- 6 files changed, 150 insertions(+), 36 deletions(-) diff --git a/src/client.rs b/src/client.rs index f76d817..616ceef 100644 --- a/src/client.rs +++ b/src/client.rs @@ -195,7 +195,6 @@ impl BackupClient { debug!("list_generationgs: body={:?}", body); let map: HashMap = serde_yaml::from_slice(&body)?; debug!("list_generations: map={:?}", map); - eprintln!("list_generations: map={:?}", map); let finished = map .iter() .map(|(id, meta)| FinishedGeneration::new(id, meta.ended().map_or("", |s| s))) diff --git a/src/cmd/backup.rs b/src/cmd/backup.rs index c9b7fc5..2a8e086 100644 --- a/src/cmd/backup.rs +++ b/src/cmd/backup.rs @@ -1,61 +1,100 @@ use crate::client::{BackupClient, ClientConfig}; +use crate::fsentry::FilesystemEntry; use crate::fsiter::FsIterator; -use crate::generation::NascentGeneration; +use crate::generation::{LocalGeneration, NascentGeneration}; use indicatif::{ProgressBar, ProgressStyle}; -use log::info; +use log::{debug, info}; use tempfile::NamedTempFile; -const GUESS_FILE_COUNT: u64 = 0; - pub fn backup(config: &ClientConfig, buffer_size: usize) -> anyhow::Result<()> { let client = BackupClient::new(&config.server_url)?; // Create a named temporary file. We don't meed the open file // handle, so we discard that. - let dbname = { + let oldname = { let temp = NamedTempFile::new()?; let (_, dbname) = temp.keep()?; dbname }; + // Create a named temporary file. We don't meed the open file + // handle, so we discard that. + let newname = { + let temp = NamedTempFile::new()?; + let (_, dbname) = temp.keep()?; + dbname + }; + + let genlist = client.list_generations()?; { - // Create the SQLite database using the named temporary file. - // The fetching is in its own block so that the file handles - // get closed and data flushed to disk. - let mut gen = NascentGeneration::create(&dbname)?; - let progress = create_progress_bar(GUESS_FILE_COUNT, true); + let iter = FsIterator::new(&config.root); + let mut new = NascentGeneration::create(&newname)?; + let progress = create_progress_bar(true); progress.enable_steady_tick(100); - gen.insert_iter(FsIterator::new(&config.root).map(|entry| { - progress.inc(1); - match entry { - Err(err) => Err(err), - Ok(entry) => { - let path = &entry.pathbuf(); - info!("backup: {}", path.display()); - progress.set_message(&format!("{}", path.display())); - client.upload_filesystem_entry(entry, buffer_size) - } + + match genlist.resolve("latest") { + None => { + info!("fresh backup without a previous generation"); + new.insert_iter(iter.map(|entry| { + progress.inc(1); + match entry { + Err(err) => Err(err), + Ok(entry) => { + let path = &entry.pathbuf(); + info!("backup: {}", path.display()); + progress.set_message(&format!("{}", path.display())); + client.upload_filesystem_entry(entry, buffer_size) + } + } + }))?; } - }))?; - progress.set_length(gen.file_count()); + Some(old) => { + info!("incremental backup based on {}", old); + let old = client.fetch_generation(&old, &oldname)?; + progress.set_length(old.file_count()?.into()); + new.insert_iter(iter.map(|entry| { + progress.inc(1); + match entry { + Err(err) => Err(err), + Ok(entry) => { + let path = &entry.pathbuf(); + info!("backup: {}", path.display()); + progress.set_message(&format!("{}", path.display())); + if needs_backup(&old, &entry) { + client.upload_filesystem_entry(entry, buffer_size) + } else { + let fileno = old.get_fileno(&entry.pathbuf())?; + let ids = if let Some(fileno) = fileno { + old.chunkids(fileno)? + } else { + vec![] + }; + Ok((entry.clone(), ids)) + } + } + } + }))?; + } + } + progress.set_length(new.file_count()); progress.finish(); - println!("file count: {}", gen.file_count()); } // Upload the SQLite file, i.e., the named temporary file, which // still exists, since we persisted it above. - let gen_id = client.upload_generation(&dbname, buffer_size)?; + let gen_id = client.upload_generation(&newname, buffer_size)?; println!("gen id: {}", gen_id); // Delete the temporary file. - std::fs::remove_file(&dbname)?; + std::fs::remove_file(&newname)?; + std::fs::remove_file(&oldname)?; Ok(()) } -fn create_progress_bar(file_count: u64, verbose: bool) -> ProgressBar { +fn create_progress_bar(verbose: bool) -> ProgressBar { let progress = if verbose { - ProgressBar::new(file_count) + ProgressBar::new(0) } else { ProgressBar::hidden() }; @@ -69,3 +108,48 @@ fn create_progress_bar(file_count: u64, verbose: bool) -> ProgressBar { progress.set_style(ProgressStyle::default_bar().template(&parts.join("\n"))); progress } + +fn needs_backup(old: &LocalGeneration, new_entry: &FilesystemEntry) -> bool { + let new_name = new_entry.pathbuf(); + match old.get_file(&new_name) { + // File is not in old generation. + Ok(None) => { + debug!( + "needs_backup: file is not in old generation, needs backup: {:?}", + new_name + ); + true + } + + // File is in old generation. Has its metadata changed? + Ok(Some(old_entry)) => { + if file_has_changed(&old_entry, new_entry) { + debug!("needs_backup: file has changed: {:?}", new_name); + true + } else { + debug!("needs_backup: file has NOT changed: {:?}", new_name); + false + } + } + + // There was an error, which we ignore, but we indicate the + // file needs to be backed up now. + Err(err) => { + debug!( + "needs_backup: lookup in old generation returned error, ignored: {:?}: {}", + new_name, err + ); + true + } + } +} + +fn file_has_changed(old: &FilesystemEntry, new: &FilesystemEntry) -> bool { + let unchanged = old.kind() == new.kind() + && old.len() == new.len() + && old.mode() == new.mode() + && old.mtime() == new.mtime() + && old.mtime_ns() == new.mtime_ns() + && old.symlink_target() == new.symlink_target(); + !unchanged +} diff --git a/src/error.rs b/src/error.rs index 1e750e5..3b3f573 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,3 +1,4 @@ +use std::path::PathBuf; use thiserror::Error; /// Define all the kinds of errors any part of this crate can return. @@ -5,4 +6,7 @@ use thiserror::Error; pub enum ObnamError { #[error("Can't find backup '{0}'")] UnknownGeneration(String), + + #[error("Generation has more than one file with the name {0}")] + TooManyFiles(PathBuf), } diff --git a/src/fsentry.rs b/src/fsentry.rs index 471b1bd..eae11b4 100644 --- a/src/fsentry.rs +++ b/src/fsentry.rs @@ -15,7 +15,7 @@ use std::path::{Path, PathBuf}; /// /// This is everything Obnam cares about each file system object, when /// making a backup. -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize)] pub struct FilesystemEntry { kind: FilesystemKind, path: Vec, diff --git a/src/generation.rs b/src/generation.rs index 4e7589c..7a4b71b 100644 --- a/src/generation.rs +++ b/src/generation.rs @@ -1,4 +1,5 @@ use crate::chunkid::ChunkId; +use crate::error::ObnamError; use crate::fsentry::FilesystemEntry; use rusqlite::{params, Connection, OpenFlags, Row, Transaction}; use std::path::Path; @@ -184,4 +185,35 @@ impl LocalGeneration { } Ok(ids) } + + pub fn get_file(&self, filename: &Path) -> anyhow::Result> { + match self.get_file_and_fileno(filename)? { + None => Ok(None), + Some((_, e)) => Ok(Some(e)), + } + } + + pub fn get_fileno(&self, filename: &Path) -> anyhow::Result> { + match self.get_file_and_fileno(filename)? { + None => Ok(None), + Some((id, _)) => Ok(Some(id)), + } + } + + fn get_file_and_fileno( + &self, + filename: &Path, + ) -> anyhow::Result> { + let files = self.files()?; + let files: Vec<(u64, FilesystemEntry)> = files + .iter() + .filter(|(_, e)| e.pathbuf() == filename) + .map(|(id, e)| (*id, e.clone())) + .collect(); + match files.len() { + 0 => Ok(None), + 1 => Ok(Some((files[0].0, files[0].1.clone()))), + _ => return Err(ObnamError::TooManyFiles(filename.to_path_buf()).into()), + } + } } diff --git a/src/genlist.rs b/src/genlist.rs index 9c511f2..10c614e 100644 --- a/src/genlist.rs +++ b/src/genlist.rs @@ -18,11 +18,9 @@ impl GenerationList { pub fn resolve(&self, genref: &str) -> Option { let gen = if self.list.is_empty() { - eprintln!("genlist: empty"); None } else if genref == "latest" { let i = self.list.len() - 1; - eprintln!("genlist: latest={} of {}", i, self.list.len()); Some(self.list[i].clone()) } else { let genref: ChunkId = genref.parse().unwrap(); @@ -31,18 +29,15 @@ impl GenerationList { .filter(|gen| gen.id() == genref) .map(|gen| gen.clone()) .collect(); - eprintln!("genlist: hits={}", hits.len()); if hits.len() == 1 { Some(hits[0].clone()) } else { None } }; - let ret = match gen { + match gen { None => None, Some(gen) => Some(gen.id().to_string()), - }; - eprintln!("genlist: return {:?}", ret); - ret + } } } -- cgit v1.2.1