diff options
-rw-r--r-- | Cargo.lock | 10 | ||||
-rw-r--r-- | Cargo.toml | 1 | ||||
-rw-r--r-- | obnam.md | 24 | ||||
-rw-r--r-- | src/backup_run.rs | 22 | ||||
-rw-r--r-- | src/checksummer.rs | 41 | ||||
-rw-r--r-- | src/chunk.rs | 6 | ||||
-rw-r--r-- | src/chunker.rs | 16 | ||||
-rw-r--r-- | src/chunkid.rs | 6 | ||||
-rw-r--r-- | src/chunkmeta.rs | 20 | ||||
-rw-r--r-- | src/cipher.rs | 6 | ||||
-rw-r--r-- | src/client.rs | 4 | ||||
-rw-r--r-- | src/cmd/backup.rs | 52 | ||||
-rw-r--r-- | src/dbgen.rs | 40 | ||||
-rw-r--r-- | src/error.rs | 5 | ||||
-rw-r--r-- | src/generation.rs | 17 | ||||
-rw-r--r-- | src/genmeta.rs | 5 | ||||
-rw-r--r-- | src/index.rs | 18 | ||||
-rw-r--r-- | src/label.rs | 138 | ||||
-rw-r--r-- | src/lib.rs | 2 | ||||
-rw-r--r-- | src/server.rs | 4 |
20 files changed, 313 insertions, 124 deletions
@@ -114,6 +114,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] +name = "blake2" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9cf849ee05b2ee5fba5e36f97ff8ec2533916700fc0758d40d92136a42f3388" +dependencies = [ + "digest 0.10.3", +] + +[[package]] name = "block-buffer" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -977,6 +986,7 @@ version = "0.7.1" dependencies = [ "aes-gcm", "anyhow", + "blake2", "bytesize", "chrono", "directories-next", @@ -13,6 +13,7 @@ rust-version = "1.56.0" [dependencies] aes-gcm = "0.9" anyhow = "1" +blake2 = "0.10.4" bytesize = "1" chrono = "0.4" directories-next = "2" @@ -1092,7 +1092,7 @@ storage of backed up data. ~~~scenario given a working Obnam system and a file data.dat containing some random data -when I POST data.dat to /v1/chunks, with chunk-meta: {"label":"abc"} +when I POST data.dat to /v1/chunks, with chunk-meta: {"label":"0abc"} then HTTP status code is 201 and content-type is application/json and the JSON body has a field chunk_id, henceforth ID @@ -1105,17 +1105,17 @@ We must be able to retrieve it. when I GET /v1/chunks/<ID> then HTTP status code is 200 and content-type is application/octet-stream -and chunk-meta is {"label":"abc"} +and chunk-meta is {"label":"0abc"} and the body matches file data.dat ~~~ We must also be able to find it based on metadata. ~~~scenario -when I GET /v1/chunks?label=abc +when I GET /v1/chunks?label=0abc then HTTP status code is 200 and content-type is application/json -and the JSON body matches {"<ID>":{"label":"abc"}} +and the JSON body matches {"<ID>":{"label":"0abc"}} ~~~ Finally, we must be able to delete it. After that, we must not be able @@ -1128,7 +1128,7 @@ then HTTP status code is 200 when I GET /v1/chunks/<ID> then HTTP status code is 404 -when I GET /v1/chunks?label=abc +when I GET /v1/chunks?label=0abc then HTTP status code is 200 and content-type is application/json and the JSON body matches {} @@ -1151,7 +1151,7 @@ We must get an empty result if searching for chunks that don't exist. ~~~scenario given a working Obnam system -when I GET /v1/chunks?label=abc +when I GET /v1/chunks?label=0abc then HTTP status code is 200 and content-type is application/json and the JSON body matches {} @@ -1178,7 +1178,7 @@ First, create a chunk. ~~~scenario given a working Obnam system and a file data.dat containing some random data -when I POST data.dat to /v1/chunks, with chunk-meta: {"label":"abc"} +when I POST data.dat to /v1/chunks, with chunk-meta: {"label":"0abc"} then HTTP status code is 201 and content-type is application/json and the JSON body has a field chunk_id, henceforth ID @@ -1194,10 +1194,10 @@ given a running chunk server Can we still find it by its metadata? ~~~scenario -when I GET /v1/chunks?label=abc +when I GET /v1/chunks?label=0abc then HTTP status code is 200 and content-type is application/json -and the JSON body matches {"<ID>":{"label":"abc"}} +and the JSON body matches {"<ID>":{"label":"0abc"}} ~~~ Can we still retrieve it by its identifier? @@ -1206,7 +1206,7 @@ Can we still retrieve it by its identifier? when I GET /v1/chunks/<ID> then HTTP status code is 200 and content-type is application/octet-stream -and chunk-meta is {"label":"abc"} +and chunk-meta is {"label":"0abc"} and the body matches file data.dat ~~~ @@ -1901,7 +1901,9 @@ then stdout, as JSON, has all the values in file geninfo.json "major": 0, "minor": 0 }, - "extras": {} + "extras": { + "checksum_kind": "sha256" + } } ~~~ diff --git a/src/backup_run.rs b/src/backup_run.rs index 29e82fc..2418871 100644 --- a/src/backup_run.rs +++ b/src/backup_run.rs @@ -15,6 +15,7 @@ use crate::fsiter::{AnnotatedFsEntry, FsIterError, FsIterator}; use crate::generation::{ GenId, LocalGeneration, LocalGenerationError, NascentError, NascentGeneration, }; +use crate::label::LabelChecksumKind; use crate::performance::{Clock, Performance}; use crate::policy::BackupPolicy; use crate::schema::SchemaVersion; @@ -24,10 +25,12 @@ use chrono::{DateTime, Local}; use log::{debug, error, info, warn}; use std::path::{Path, PathBuf}; +const DEFAULT_CHECKSUM_KIND: LabelChecksumKind = LabelChecksumKind::Sha256; const SQLITE_CHUNK_SIZE: usize = MIB as usize; /// A running backup. pub struct BackupRun<'a> { + checksum_kind: Option<LabelChecksumKind>, client: &'a BackupClient, policy: BackupPolicy, buffer_size: usize, @@ -105,6 +108,7 @@ impl<'a> BackupRun<'a> { /// Create a new run for an initial backup. pub fn initial(config: &ClientConfig, client: &'a BackupClient) -> Result<Self, BackupError> { Ok(Self { + checksum_kind: Some(DEFAULT_CHECKSUM_KIND), client, policy: BackupPolicy::default(), buffer_size: config.chunk_size, @@ -118,6 +122,7 @@ impl<'a> BackupRun<'a> { client: &'a BackupClient, ) -> Result<Self, BackupError> { Ok(Self { + checksum_kind: None, client, policy: BackupPolicy::default(), buffer_size: config.chunk_size, @@ -136,7 +141,7 @@ impl<'a> BackupRun<'a> { None => { // Create a new, empty generation. let schema = schema_version(DEFAULT_SCHEMA_MAJOR).unwrap(); - NascentGeneration::create(oldname, schema)?.close()?; + NascentGeneration::create(oldname, schema, self.checksum_kind.unwrap())?.close()?; // Open the newly created empty generation. Ok(LocalGeneration::open(oldname)?) @@ -146,6 +151,11 @@ impl<'a> BackupRun<'a> { let old = self.fetch_previous_generation(genid, oldname).await?; perf.stop(Clock::GenerationDownload); + let meta = old.meta()?; + if let Some(v) = meta.get("checksum_kind") { + self.checksum_kind = Some(LabelChecksumKind::from(v)?); + } + let progress = BackupProgress::incremental(); progress.files_in_previous_generation(old.file_count()? as u64); self.progress = Some(progress); @@ -155,6 +165,12 @@ impl<'a> BackupRun<'a> { } } + fn checksum_kind(&self) -> LabelChecksumKind { + self.checksum_kind + .or(Some(LabelChecksumKind::Sha256)) + .unwrap() + } + async fn fetch_previous_generation( &self, genid: &GenId, @@ -185,7 +201,7 @@ impl<'a> BackupRun<'a> { let mut warnings: Vec<BackupError> = vec![]; let mut new_cachedir_tags = vec![]; let files_count = { - let mut new = NascentGeneration::create(newpath, schema)?; + let mut new = NascentGeneration::create(newpath, schema, self.checksum_kind.unwrap())?; for root in &config.roots { match self.backup_one_root(config, old, &mut new, root).await { Ok(mut o) => { @@ -378,7 +394,7 @@ impl<'a> BackupRun<'a> { let mut chunk_ids = vec![]; let file = std::fs::File::open(filename) .map_err(|err| ClientError::FileOpen(filename.to_path_buf(), err))?; - let chunker = FileChunks::new(size, file, filename); + let chunker = FileChunks::new(size, file, filename, self.checksum_kind()); for item in chunker { let chunk = item?; if let Some(chunk_id) = self.client.has_chunk(chunk.meta()).await? { diff --git a/src/checksummer.rs b/src/checksummer.rs deleted file mode 100644 index 50bce04..0000000 --- a/src/checksummer.rs +++ /dev/null @@ -1,41 +0,0 @@ -//! Compute checksums of data. -//! -//! De-duplication of backed up data in Obnam relies on cryptographic -//! checksums. They are implemented in this module. Note that Obnam -//! does not aim to make these algorithms configurable, so only a very -//! small number of carefully chosen algorithms are supported here. - -use sha2::{Digest, Sha256}; -use std::fmt; - -/// A checksum of some data. -#[derive(Debug, Clone)] -pub enum Checksum { - /// A SHA256 checksum. - Sha256(String), -} - -impl Checksum { - /// Compute a SHA256 checksum for a block of data. - pub fn sha256(data: &[u8]) -> Self { - let mut hasher = Sha256::new(); - hasher.update(data); - let hash = hasher.finalize(); - Self::Sha256(format!("{:x}", hash)) - } - - /// Create a `Checksum` from a known, previously computed hash. - pub fn sha256_from_str_unchecked(hash: &str) -> Self { - Self::Sha256(hash.to_string()) - } -} - -impl fmt::Display for Checksum { - /// Format a checksum for display. - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let hash = match self { - Self::Sha256(hash) => hash, - }; - write!(f, "{}", hash) - } -} diff --git a/src/chunk.rs b/src/chunk.rs index 27a3ab9..4f604b9 100644 --- a/src/chunk.rs +++ b/src/chunk.rs @@ -1,8 +1,8 @@ //! Chunks of data. -use crate::checksummer::Checksum; use crate::chunkid::ChunkId; use crate::chunkmeta::ChunkMeta; +use crate::label::Label; use serde::{Deserialize, Serialize}; use std::default::Default; @@ -97,7 +97,7 @@ impl GenerationChunk { let json: String = serde_json::to_string(self).map_err(GenerationChunkError::JsonGenerate)?; let bytes = json.as_bytes().to_vec(); - let checksum = Checksum::sha256(&bytes); + let checksum = Label::sha256(&bytes); let meta = ChunkMeta::new(&checksum); Ok(DataChunk::new(bytes, meta)) } @@ -185,7 +185,7 @@ impl ClientTrust { pub fn to_data_chunk(&self) -> Result<DataChunk, ClientTrustError> { let json: String = serde_json::to_string(self).map_err(ClientTrustError::JsonGenerate)?; let bytes = json.as_bytes().to_vec(); - let checksum = Checksum::sha256_from_str_unchecked("client-trust"); + let checksum = Label::literal("client-trust"); let meta = ChunkMeta::new(&checksum); Ok(DataChunk::new(bytes, meta)) } diff --git a/src/chunker.rs b/src/chunker.rs index 7954621..29f8a90 100644 --- a/src/chunker.rs +++ b/src/chunker.rs @@ -1,14 +1,15 @@ //! Split file data into chunks. -use crate::checksummer::Checksum; use crate::chunk::DataChunk; use crate::chunkmeta::ChunkMeta; +use crate::label::{Label, LabelChecksumKind}; use std::io::prelude::*; use std::path::{Path, PathBuf}; /// Iterator over chunks in a file. pub struct FileChunks { chunk_size: usize, + kind: LabelChecksumKind, buf: Vec<u8>, filename: PathBuf, handle: std::fs::File, @@ -24,11 +25,17 @@ pub enum ChunkerError { impl FileChunks { /// Create new iterator. - pub fn new(chunk_size: usize, handle: std::fs::File, filename: &Path) -> Self { + pub fn new( + chunk_size: usize, + handle: std::fs::File, + filename: &Path, + kind: LabelChecksumKind, + ) -> Self { let mut buf = vec![]; buf.resize(chunk_size, 0); Self { chunk_size, + kind, buf, handle, filename: filename.to_path_buf(), @@ -54,7 +61,10 @@ impl FileChunks { } let buffer = &self.buf.as_slice()[..used]; - let hash = Checksum::sha256(buffer); + let hash = match self.kind { + LabelChecksumKind::Blake2 => Label::blake2(buffer), + LabelChecksumKind::Sha256 => Label::sha256(buffer), + }; let meta = ChunkMeta::new(&hash); let chunk = DataChunk::new(buffer.to_vec(), meta); Ok(Some(chunk)) diff --git a/src/chunkid.rs b/src/chunkid.rs index 3534627..50fc3d3 100644 --- a/src/chunkid.rs +++ b/src/chunkid.rs @@ -3,7 +3,7 @@ //! Chunk identifiers are chosen by the server. Each chunk has a //! unique identifier, which isn't based on the contents of the chunk. -use crate::checksummer::Checksum; +use crate::label::Label; use rusqlite::types::ToSqlOutput; use rusqlite::ToSql; use serde::{Deserialize, Serialize}; @@ -53,8 +53,8 @@ impl ChunkId { } /// Return the SHA256 checksum of the identifier. - pub fn sha256(&self) -> Checksum { - Checksum::sha256(self.id.as_bytes()) + pub fn sha256(&self) -> Label { + Label::sha256(self.id.as_bytes()) } } diff --git a/src/chunkmeta.rs b/src/chunkmeta.rs index 33c1070..fe7ef4c 100644 --- a/src/chunkmeta.rs +++ b/src/chunkmeta.rs @@ -1,6 +1,6 @@ //! Metadata about a chunk. -use crate::checksummer::Checksum; +use crate::label::Label; use serde::{Deserialize, Serialize}; use std::default::Default; use std::str::FromStr; @@ -37,9 +37,9 @@ impl ChunkMeta { /// Create a new data chunk. /// /// Data chunks are not for generations. - pub fn new(checksum: &Checksum) -> Self { + pub fn new(label: &Label) -> Self { ChunkMeta { - label: checksum.to_string(), + label: label.serialize(), } } @@ -79,20 +79,20 @@ impl FromStr for ChunkMeta { #[cfg(test)] mod test { - use super::{Checksum, ChunkMeta}; + use super::{ChunkMeta, Label}; #[test] fn new_creates_data_chunk() { - let sum = Checksum::sha256_from_str_unchecked("abcdef"); + let sum = Label::sha256(b"abcdef"); let meta = ChunkMeta::new(&sum); - assert_eq!(meta.label(), "abcdef"); + assert_eq!(meta.label(), sum.serialize()); } #[test] fn new_generation_creates_generation_chunk() { - let sum = Checksum::sha256_from_str_unchecked("abcdef"); + let sum = Label::sha256(b"abcdef"); let meta = ChunkMeta::new(&sum); - assert_eq!(meta.label(), "abcdef"); + assert_eq!(meta.label(), sum.serialize()); } #[test] @@ -113,7 +113,7 @@ mod test { #[test] fn generation_json_roundtrip() { - let sum = Checksum::sha256_from_str_unchecked("abcdef"); + let sum = Label::sha256(b"abcdef"); let meta = ChunkMeta::new(&sum); let json = serde_json::to_string(&meta).unwrap(); let meta2 = serde_json::from_str(&json).unwrap(); @@ -122,7 +122,7 @@ mod test { #[test] fn data_json_roundtrip() { - let sum = Checksum::sha256_from_str_unchecked("abcdef"); + let sum = Label::sha256(b"abcdef"); let meta = ChunkMeta::new(&sum); let json = meta.to_json_vec(); let meta2 = serde_json::from_slice(&json).unwrap(); diff --git a/src/cipher.rs b/src/cipher.rs index ee7fb8f..7bd2e84 100644 --- a/src/cipher.rs +++ b/src/cipher.rs @@ -191,15 +191,15 @@ impl Nonce { #[cfg(test)] mod test { - use crate::checksummer::Checksum; use crate::chunk::DataChunk; use crate::chunkmeta::ChunkMeta; use crate::cipher::{CipherEngine, CipherError, CHUNK_V1, NONCE_SIZE}; + use crate::label::Label; use crate::passwords::Passwords; #[test] fn metadata_as_aad() { - let sum = Checksum::sha256_from_str_unchecked("dummy-checksum"); + let sum = Label::sha256(b"dummy data"); let meta = ChunkMeta::new(&sum); let meta_as_aad = meta.to_json_vec(); let chunk = DataChunk::new("hello".as_bytes().to_vec(), meta); @@ -212,7 +212,7 @@ mod test { #[test] fn round_trip() { - let sum = Checksum::sha256_from_str_unchecked("dummy-checksum"); + let sum = Label::sha256(b"dummy data"); let meta = ChunkMeta::new(&sum); let chunk = DataChunk::new("hello".as_bytes().to_vec(), meta); let pass = Passwords::new("secret"); diff --git a/src/client.rs b/src/client.rs index c5d66c1..bed5f1e 100644 --- a/src/client.rs +++ b/src/client.rs @@ -9,6 +9,7 @@ use crate::cipher::{CipherEngine, CipherError}; use crate::config::{ClientConfig, ClientConfigError}; use crate::generation::{FinishedGeneration, GenId, LocalGeneration, LocalGenerationError}; use crate::genlist::GenerationList; +use crate::label::Label; use log::{debug, error, info}; use reqwest::header::HeaderMap; @@ -195,7 +196,8 @@ impl BackupClient { } async fn find_client_trusts(&self) -> Result<Vec<ChunkId>, ClientError> { - let body = match self.get("", &[("label", "client-trust")]).await { + let label = Label::literal("client-trust").serialize(); + let body = match self.get("", &[("label", &label)]).await { Ok((_, body)) => body, Err(err) => return Err(err), }; diff --git a/src/cmd/backup.rs b/src/cmd/backup.rs index 8a85703..60045cc 100644 --- a/src/cmd/backup.rs +++ b/src/cmd/backup.rs @@ -19,7 +19,11 @@ use tokio::runtime::Runtime; /// Make a backup. #[derive(Debug, StructOpt)] pub struct Backup { - /// Backup schema major version. + /// Force a full backup, instead of an incremental one. + #[structopt(long)] + full: bool, + + /// Backup schema major version to use. #[structopt(long)] backup_version: Option<VersionComponent>, } @@ -53,29 +57,35 @@ impl Backup { let oldtemp = temp.path().join("old.db"); let newtemp = temp.path().join("new.db"); - let (is_incremental, outcome) = match genlist.resolve("latest") { - Err(_) => { - info!("fresh backup without a previous generation"); - let mut run = BackupRun::initial(config, &client)?; - let old = run.start(None, &oldtemp, perf).await?; - ( - false, - run.backup_roots(config, &old, &newtemp, schema, perf) - .await?, - ) - } - Ok(old_id) => { - info!("incremental backup based on {}", old_id); - let mut run = BackupRun::incremental(config, &client)?; - let old = run.start(Some(&old_id), &oldtemp, perf).await?; - ( - true, - run.backup_roots(config, &old, &newtemp, schema, perf) - .await?, - ) + let old_id = if self.full { + None + } else { + match genlist.resolve("latest") { + Err(_) => None, + Ok(old_id) => Some(old_id), } }; + let (is_incremental, outcome) = if let Some(old_id) = old_id { + info!("incremental backup based on {}", old_id); + let mut run = BackupRun::incremental(config, &client)?; + let old = run.start(Some(&old_id), &oldtemp, perf).await?; + ( + true, + run.backup_roots(config, &old, &newtemp, schema, perf) + .await?, + ) + } else { + info!("fresh backup without a previous generation"); + let mut run = BackupRun::initial(config, &client)?; + let old = run.start(None, &oldtemp, perf).await?; + ( + false, + run.backup_roots(config, &old, &newtemp, schema, perf) + .await?, + ) + }; + perf.start(Clock::GenerationUpload); let mut trust = trust; trust.append_backup(outcome.gen_id.as_chunk_id()); diff --git a/src/dbgen.rs b/src/dbgen.rs index 816ea11..8e5ece5 100644 --- a/src/dbgen.rs +++ b/src/dbgen.rs @@ -5,6 +5,7 @@ use crate::chunkid::ChunkId; use crate::db::{Column, Database, DatabaseError, SqlResults, Table, Value}; use crate::fsentry::FilesystemEntry; use crate::genmeta::{GenerationMeta, GenerationMetaError}; +use crate::label::LabelChecksumKind; use crate::schema::{SchemaVersion, VersionComponent}; use log::error; use std::collections::HashMap; @@ -90,14 +91,15 @@ impl GenerationDb { pub fn create<P: AsRef<Path>>( filename: P, schema: SchemaVersion, + checksum_kind: LabelChecksumKind, ) -> Result<Self, GenerationDbError> { let meta_table = Self::meta_table(); let variant = match schema.version() { (V0_0::MAJOR, V0_0::MINOR) => { - GenerationDbVariant::V0_0(V0_0::create(filename, meta_table)?) + GenerationDbVariant::V0_0(V0_0::create(filename, meta_table, checksum_kind)?) } (V1_0::MAJOR, V1_0::MINOR) => { - GenerationDbVariant::V1_0(V1_0::create(filename, meta_table)?) + GenerationDbVariant::V1_0(V1_0::create(filename, meta_table, checksum_kind)?) } (major, minor) => return Err(GenerationDbError::Incompatible(major, minor)), }; @@ -240,11 +242,15 @@ impl V0_0 { const MINOR: VersionComponent = 0; /// Create a new generation database in read/write mode. - pub fn create<P: AsRef<Path>>(filename: P, meta: Table) -> Result<Self, GenerationDbError> { + pub fn create<P: AsRef<Path>>( + filename: P, + meta: Table, + checksum_kind: LabelChecksumKind, + ) -> Result<Self, GenerationDbError> { let db = Database::create(filename.as_ref())?; let mut moi = Self::new(db, meta); moi.created = true; - moi.create_tables()?; + moi.create_tables(checksum_kind)?; Ok(moi) } @@ -276,7 +282,7 @@ impl V0_0 { } } - fn create_tables(&mut self) -> Result<(), GenerationDbError> { + fn create_tables(&mut self, checksum_kind: LabelChecksumKind) -> Result<(), GenerationDbError> { self.db.create_table(&self.meta)?; self.db.create_table(&self.files)?; self.db.create_table(&self.chunks)?; @@ -295,6 +301,13 @@ impl V0_0 { Value::text("value", &format!("{}", Self::MINOR)), ], )?; + self.db.insert( + &self.meta, + &[ + Value::text("key", "checksum_kind"), + Value::text("value", checksum_kind.serialize()), + ], + )?; Ok(()) } @@ -483,11 +496,15 @@ impl V1_0 { const MINOR: VersionComponent = 0; /// Create a new generation database in read/write mode. - pub fn create<P: AsRef<Path>>(filename: P, meta: Table) -> Result<Self, GenerationDbError> { + pub fn create<P: AsRef<Path>>( + filename: P, + meta: Table, + checksum_kind: LabelChecksumKind, + ) -> Result<Self, GenerationDbError> { let db = Database::create(filename.as_ref())?; let mut moi = Self::new(db, meta); moi.created = true; - moi.create_tables()?; + moi.create_tables(checksum_kind)?; Ok(moi) } @@ -519,7 +536,7 @@ impl V1_0 { } } - fn create_tables(&mut self) -> Result<(), GenerationDbError> { + fn create_tables(&mut self, checksum_kind: LabelChecksumKind) -> Result<(), GenerationDbError> { self.db.create_table(&self.meta)?; self.db.create_table(&self.files)?; self.db.create_table(&self.chunks)?; @@ -538,6 +555,13 @@ impl V1_0 { Value::text("value", &format!("{}", Self::MINOR)), ], )?; + self.db.insert( + &self.meta, + &[ + Value::text("key", "checksum_kind"), + Value::text("value", checksum_kind.serialize()), + ], + )?; Ok(()) } diff --git a/src/error.rs b/src/error.rs index 9c9b432..928f258 100644 --- a/src/error.rs +++ b/src/error.rs @@ -10,6 +10,7 @@ use crate::db::DatabaseError; use crate::dbgen::GenerationDbError; use crate::generation::{LocalGenerationError, NascentError}; use crate::genlist::GenerationListError; +use crate::label::LabelError; use crate::passwords::PasswordError; use std::path::PathBuf; use std::time::SystemTimeError; @@ -22,6 +23,10 @@ use tempfile::PersistError; /// convenience. #[derive(Debug, thiserror::Error)] pub enum ObnamError { + /// Error from chunk labels. + #[error(transparent)] + Label(#[from] LabelError), + /// Error listing generations on server. #[error(transparent)] GenerationListError(#[from] GenerationListError), diff --git a/src/generation.rs b/src/generation.rs index 715b426..0a0fc77 100644 --- a/src/generation.rs +++ b/src/generation.rs @@ -6,6 +6,7 @@ use crate::db::{DatabaseError, SqlResults}; use crate::dbgen::{FileId, GenerationDb, GenerationDbError}; use crate::fsentry::FilesystemEntry; use crate::genmeta::{GenerationMeta, GenerationMetaError}; +use crate::label::LabelChecksumKind; use crate::schema::{SchemaVersion, VersionComponent}; use std::fmt; use std::path::{Path, PathBuf}; @@ -77,11 +78,15 @@ pub enum NascentError { impl NascentGeneration { /// Create a new nascent generation. - pub fn create<P>(filename: P, schema: SchemaVersion) -> Result<Self, NascentError> + pub fn create<P>( + filename: P, + schema: SchemaVersion, + checksum_kind: LabelChecksumKind, + ) -> Result<Self, NascentError> where P: AsRef<Path>, { - let db = GenerationDb::create(filename.as_ref(), schema)?; + let db = GenerationDb::create(filename.as_ref(), schema, checksum_kind)?; Ok(Self { db, fileno: 0 }) } @@ -290,7 +295,7 @@ impl LocalGeneration { #[cfg(test)] mod test { - use super::{LocalGeneration, NascentGeneration, SchemaVersion}; + use super::{LabelChecksumKind, LocalGeneration, NascentGeneration, SchemaVersion}; use tempfile::NamedTempFile; #[test] @@ -298,7 +303,8 @@ mod test { let filename = NamedTempFile::new().unwrap().path().to_path_buf(); let schema = SchemaVersion::new(0, 0); { - let mut _gen = NascentGeneration::create(&filename, schema).unwrap(); + let mut _gen = + NascentGeneration::create(&filename, schema, LabelChecksumKind::Sha256).unwrap(); // _gen is dropped here; the connection is close; the file // should not be removed. } @@ -328,7 +334,8 @@ mod test { let tag_path2 = Path::new("/another_dir/a_tag"); let schema = SchemaVersion::new(0, 0); - let mut gen = NascentGeneration::create(&dbfile, schema).unwrap(); + let mut gen = + NascentGeneration::create(&dbfile, schema, LabelChecksumKind::Sha256).unwrap(); let mut cache = users::UsersCache::new(); gen.insert( diff --git a/src/genmeta.rs b/src/genmeta.rs index 2ce4c4c..d5b14a3 100644 --- a/src/genmeta.rs +++ b/src/genmeta.rs @@ -26,6 +26,11 @@ impl GenerationMeta { pub fn schema_version(&self) -> SchemaVersion { self.schema_version } + + /// Get a value corresponding to a key in the meta table. + pub fn get(&self, key: &str) -> Option<&String> { + self.extras.get(key) + } } fn metastr(map: &mut HashMap<String, String>, key: &str) -> Result<String, GenerationMetaError> { diff --git a/src/index.rs b/src/index.rs index 11f3480..52da2f2 100644 --- a/src/index.rs +++ b/src/index.rs @@ -1,8 +1,8 @@ //! An on-disk index of chunks for the server. -use crate::checksummer::Checksum; use crate::chunkid::ChunkId; use crate::chunkmeta::ChunkMeta; +use crate::label::Label; use rusqlite::Connection; use std::path::Path; @@ -74,7 +74,7 @@ impl Index { #[cfg(test)] mod test { - use crate::checksummer::Checksum; + use super::Label; use super::{ChunkId, ChunkMeta, Index}; use std::path::Path; @@ -87,20 +87,20 @@ mod test { #[test] fn remembers_inserted() { let id: ChunkId = "id001".parse().unwrap(); - let sum = Checksum::sha256_from_str_unchecked("abc"); + let sum = Label::sha256(b"abc"); let meta = ChunkMeta::new(&sum); let dir = tempdir().unwrap(); let mut idx = new_index(dir.path()); idx.insert_meta(id.clone(), meta.clone()).unwrap(); assert_eq!(idx.get_meta(&id).unwrap(), meta); - let ids = idx.find_by_label("abc").unwrap(); + let ids = idx.find_by_label(&sum.serialize()).unwrap(); assert_eq!(ids, vec![id]); } #[test] fn does_not_find_uninserted() { let id: ChunkId = "id001".parse().unwrap(); - let sum = Checksum::sha256_from_str_unchecked("abc"); + let sum = Label::sha256(b"abc"); let meta = ChunkMeta::new(&sum); let dir = tempdir().unwrap(); let mut idx = new_index(dir.path()); @@ -111,19 +111,19 @@ mod test { #[test] fn removes_inserted() { let id: ChunkId = "id001".parse().unwrap(); - let sum = Checksum::sha256_from_str_unchecked("abc"); + let sum = Label::sha256(b"abc"); let meta = ChunkMeta::new(&sum); let dir = tempdir().unwrap(); let mut idx = new_index(dir.path()); idx.insert_meta(id.clone(), meta).unwrap(); idx.remove_meta(&id).unwrap(); - let ids: Vec<ChunkId> = idx.find_by_label("abc").unwrap(); + let ids: Vec<ChunkId> = idx.find_by_label(&sum.serialize()).unwrap(); assert_eq!(ids, vec![]); } } mod sql { - use super::{Checksum, IndexError}; + use super::{IndexError, Label}; use crate::chunkid::ChunkId; use crate::chunkmeta::ChunkMeta; use log::error; @@ -216,7 +216,7 @@ mod sql { fn row_to_meta(row: &Row) -> rusqlite::Result<ChunkMeta> { let hash: String = row.get("label")?; - let sha256 = Checksum::sha256_from_str_unchecked(&hash); + let sha256 = Label::deserialize(&hash).expect("deserialize checksum from database"); Ok(ChunkMeta::new(&sha256)) } diff --git a/src/label.rs b/src/label.rs new file mode 100644 index 0000000..19d270a --- /dev/null +++ b/src/label.rs @@ -0,0 +1,138 @@ +//! A chunk label. +//! +//! De-duplication of backed up data in Obnam relies on cryptographic +//! checksums. They are implemented in this module. Note that Obnam +//! does not aim to make these algorithms configurable, so only a very +//! small number of carefully chosen algorithms are supported here. + +use blake2::Blake2s256; +use sha2::{Digest, Sha256}; + +const LITERAL: char = '0'; +const SHA256: char = '1'; +const BLAKE2: char = '2'; + +/// A checksum of some data. +#[derive(Debug, Clone)] +pub enum Label { + /// An arbitrary, literal string. + Literal(String), + + /// A SHA256 checksum. + Sha256(String), + + /// A BLAKE2s checksum. + Blake2(String), +} + +impl Label { + /// Construct a literal string. + pub fn literal(s: &str) -> Self { + Self::Literal(s.to_string()) + } + + /// Compute a SHA256 checksum for a block of data. + pub fn sha256(data: &[u8]) -> Self { + let mut hasher = Sha256::new(); + hasher.update(data); + let hash = hasher.finalize(); + Self::Sha256(format!("{:x}", hash)) + } + + /// Compute a BLAKE2s checksum for a block of data. + pub fn blake2(data: &[u8]) -> Self { + let mut hasher = Blake2s256::new(); + hasher.update(data); + let hash = hasher.finalize(); + Self::Sha256(format!("{:x}", hash)) + } + + /// Serialize a label into a string representation. + pub fn serialize(&self) -> String { + match self { + Self::Literal(s) => format!("{}{}", LITERAL, s), + Self::Sha256(hash) => format!("{}{}", SHA256, hash), + Self::Blake2(hash) => format!("{}{}", BLAKE2, hash), + } + } + + /// De-serialize a label from its string representation. + pub fn deserialize(s: &str) -> Result<Self, LabelError> { + if s.starts_with(LITERAL) { + Ok(Self::Literal(s[1..].to_string())) + } else if s.starts_with(SHA256) { + Ok(Self::Sha256(s[1..].to_string())) + } else { + Err(LabelError::UnknownType(s.to_string())) + } + } +} + +/// Kinds of checksum labels. +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum LabelChecksumKind { + /// Use a Blake2 checksum. + Blake2, + + /// Use a SHA256 checksum. + Sha256, +} + +impl LabelChecksumKind { + /// Parse a string into a label checksum kind. + pub fn from(s: &str) -> Result<Self, LabelError> { + if s == "sha256" { + Ok(Self::Sha256) + } else if s == "blake2" { + Ok(Self::Blake2) + } else { + Err(LabelError::UnknownType(s.to_string())) + } + } + + /// Serialize a checksum kind into a string. + pub fn serialize(self) -> &'static str { + match self { + Self::Sha256 => "sha256", + Self::Blake2 => "blake2", + } + } +} + +/// Possible errors from dealing with chunk labels. +#[derive(Debug, thiserror::Error)] +pub enum LabelError { + /// Serialized label didn't start with a known type prefix. + #[error("Unknown label: {0:?}")] + UnknownType(String), +} + +#[cfg(test)] +mod test { + use super::{Label, LabelChecksumKind}; + + #[test] + fn roundtrip_literal() { + let label = Label::literal("dummy data"); + let serialized = label.serialize(); + let de = Label::deserialize(&serialized).unwrap(); + let seri2 = de.serialize(); + assert_eq!(serialized, seri2); + } + + #[test] + fn roundtrip_sha256() { + let label = Label::sha256(b"dummy data"); + let serialized = label.serialize(); + let de = Label::deserialize(&serialized).unwrap(); + let seri2 = de.serialize(); + assert_eq!(serialized, seri2); + } + + #[test] + fn roundtrip_checksum_kind() { + for kind in [LabelChecksumKind::Sha256, LabelChecksumKind::Blake2] { + assert_eq!(LabelChecksumKind::from(kind.serialize()).unwrap(), kind); + } + } +} @@ -9,7 +9,6 @@ pub mod accumulated_time; pub mod backup_progress; pub mod backup_reason; pub mod backup_run; -pub mod checksummer; pub mod chunk; pub mod chunker; pub mod chunkid; @@ -29,6 +28,7 @@ pub mod genlist; pub mod genmeta; pub mod index; pub mod indexedstore; +pub mod label; pub mod passwords; pub mod performance; pub mod policy; diff --git a/src/server.rs b/src/server.rs index 31a03fc..6b688d6 100644 --- a/src/server.rs +++ b/src/server.rs @@ -141,7 +141,7 @@ impl SearchHits { #[cfg(test)] mod test_search_hits { use super::{ChunkMeta, SearchHits}; - use crate::checksummer::Checksum; + use crate::label::Label; #[test] fn no_search_hits() { @@ -152,7 +152,7 @@ mod test_search_hits { #[test] fn one_search_hit() { let id = "abc".parse().unwrap(); - let sum = Checksum::sha256_from_str_unchecked("123"); + let sum = Label::sha256(b"123"); let meta = ChunkMeta::new(&sum); let mut hits = SearchHits::default(); hits.insert(id, meta); |