summaryrefslogtreecommitdiff
path: root/src/backup_run.rs
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2022-04-09 12:00:29 +0300
committerLars Wirzenius <liw@liw.fi>2022-04-16 09:06:59 +0300
commit18c0f4afab29e17c050208234becbfb5e2973746 (patch)
tree62bb67504c47747f8ce202f4eb4121bb3d051223 /src/backup_run.rs
parent82ff782fe85c84c10f1f18c9bd5c2b017bc2f240 (diff)
downloadobnam2-18c0f4afab29e17c050208234becbfb5e2973746.tar.gz
feat: use one checksum for all chunks in a backup
When making a backup, use the same checksum for any chunks it re-uses or creates. This is for performance: if we allowed two checksums to be used, we would have to compute the checksum for a chunk twice, and potentially look up both on the server. This is just a lot of work. Instead, we use only one. The trade-off here is that when (not if) the user wants to switch to a new checksum type, they'll have to do a full backup, uploading all their data to the server, even when it's already there, just with a different checksum. Hopefully this will be rare. Full backups always use the built-in, hardcoded default checksum, and incremental backups use whatever the previous backup used. The default is still SHA256, but this commit add code to support BLAKE2 if we decide to switch that as a default. It's also easy to add support for others, now. BLAKE2 was added to verify that Obnam can actually handle the checksum changing (manual test: not in the test suite). I don't think users need to be offered even the option of choosing a checksum algorithm to use. When one cares about both security and performance, choosing a checksum requires specialist, expert knowledge. Obnam developers should choose the default. Giving users a knob they can twiddle just makes it that much harder to configure and use Obnam. If the choice Obnam developers have made is shown to be sub-optimal, it seems better to change the default for everyone, rather than hope that every user changes their configuration to gain the benefit. Experience has shown that people mostly don't change the default configuration, and that they are especially bad at choosing well when security is a concern. (Obnam is free software. Expert users can choose their checksum by changing the source code. I'm not fundamentally limiting anyone's freedom or choice here.) Users can switch to a new default algorithm by triggering a full backup with the new "obnam backup --full". Sponsored-by: author
Diffstat (limited to 'src/backup_run.rs')
-rw-r--r--src/backup_run.rs22
1 files changed, 19 insertions, 3 deletions
diff --git a/src/backup_run.rs b/src/backup_run.rs
index 29e82fc..2418871 100644
--- a/src/backup_run.rs
+++ b/src/backup_run.rs
@@ -15,6 +15,7 @@ use crate::fsiter::{AnnotatedFsEntry, FsIterError, FsIterator};
use crate::generation::{
GenId, LocalGeneration, LocalGenerationError, NascentError, NascentGeneration,
};
+use crate::label::LabelChecksumKind;
use crate::performance::{Clock, Performance};
use crate::policy::BackupPolicy;
use crate::schema::SchemaVersion;
@@ -24,10 +25,12 @@ use chrono::{DateTime, Local};
use log::{debug, error, info, warn};
use std::path::{Path, PathBuf};
+const DEFAULT_CHECKSUM_KIND: LabelChecksumKind = LabelChecksumKind::Sha256;
const SQLITE_CHUNK_SIZE: usize = MIB as usize;
/// A running backup.
pub struct BackupRun<'a> {
+ checksum_kind: Option<LabelChecksumKind>,
client: &'a BackupClient,
policy: BackupPolicy,
buffer_size: usize,
@@ -105,6 +108,7 @@ impl<'a> BackupRun<'a> {
/// Create a new run for an initial backup.
pub fn initial(config: &ClientConfig, client: &'a BackupClient) -> Result<Self, BackupError> {
Ok(Self {
+ checksum_kind: Some(DEFAULT_CHECKSUM_KIND),
client,
policy: BackupPolicy::default(),
buffer_size: config.chunk_size,
@@ -118,6 +122,7 @@ impl<'a> BackupRun<'a> {
client: &'a BackupClient,
) -> Result<Self, BackupError> {
Ok(Self {
+ checksum_kind: None,
client,
policy: BackupPolicy::default(),
buffer_size: config.chunk_size,
@@ -136,7 +141,7 @@ impl<'a> BackupRun<'a> {
None => {
// Create a new, empty generation.
let schema = schema_version(DEFAULT_SCHEMA_MAJOR).unwrap();
- NascentGeneration::create(oldname, schema)?.close()?;
+ NascentGeneration::create(oldname, schema, self.checksum_kind.unwrap())?.close()?;
// Open the newly created empty generation.
Ok(LocalGeneration::open(oldname)?)
@@ -146,6 +151,11 @@ impl<'a> BackupRun<'a> {
let old = self.fetch_previous_generation(genid, oldname).await?;
perf.stop(Clock::GenerationDownload);
+ let meta = old.meta()?;
+ if let Some(v) = meta.get("checksum_kind") {
+ self.checksum_kind = Some(LabelChecksumKind::from(v)?);
+ }
+
let progress = BackupProgress::incremental();
progress.files_in_previous_generation(old.file_count()? as u64);
self.progress = Some(progress);
@@ -155,6 +165,12 @@ impl<'a> BackupRun<'a> {
}
}
+ fn checksum_kind(&self) -> LabelChecksumKind {
+ self.checksum_kind
+ .or(Some(LabelChecksumKind::Sha256))
+ .unwrap()
+ }
+
async fn fetch_previous_generation(
&self,
genid: &GenId,
@@ -185,7 +201,7 @@ impl<'a> BackupRun<'a> {
let mut warnings: Vec<BackupError> = vec![];
let mut new_cachedir_tags = vec![];
let files_count = {
- let mut new = NascentGeneration::create(newpath, schema)?;
+ let mut new = NascentGeneration::create(newpath, schema, self.checksum_kind.unwrap())?;
for root in &config.roots {
match self.backup_one_root(config, old, &mut new, root).await {
Ok(mut o) => {
@@ -378,7 +394,7 @@ impl<'a> BackupRun<'a> {
let mut chunk_ids = vec![];
let file = std::fs::File::open(filename)
.map_err(|err| ClientError::FileOpen(filename.to_path_buf(), err))?;
- let chunker = FileChunks::new(size, file, filename);
+ let chunker = FileChunks::new(size, file, filename, self.checksum_kind());
for item in chunker {
let chunk = item?;
if let Some(chunk_id) = self.client.has_chunk(chunk.meta()).await? {