summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Cargo.lock10
-rw-r--r--Cargo.toml1
-rw-r--r--obnam.md24
-rw-r--r--src/backup_run.rs22
-rw-r--r--src/checksummer.rs41
-rw-r--r--src/chunk.rs6
-rw-r--r--src/chunker.rs16
-rw-r--r--src/chunkid.rs6
-rw-r--r--src/chunkmeta.rs20
-rw-r--r--src/cipher.rs6
-rw-r--r--src/client.rs4
-rw-r--r--src/cmd/backup.rs52
-rw-r--r--src/dbgen.rs40
-rw-r--r--src/error.rs5
-rw-r--r--src/generation.rs17
-rw-r--r--src/genmeta.rs5
-rw-r--r--src/index.rs18
-rw-r--r--src/label.rs138
-rw-r--r--src/lib.rs2
-rw-r--r--src/server.rs4
20 files changed, 313 insertions, 124 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 9dafc07..7715bf3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -114,6 +114,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
+name = "blake2"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9cf849ee05b2ee5fba5e36f97ff8ec2533916700fc0758d40d92136a42f3388"
+dependencies = [
+ "digest 0.10.3",
+]
+
+[[package]]
name = "block-buffer"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -977,6 +986,7 @@ version = "0.7.1"
dependencies = [
"aes-gcm",
"anyhow",
+ "blake2",
"bytesize",
"chrono",
"directories-next",
diff --git a/Cargo.toml b/Cargo.toml
index 616a34a..36f1682 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,6 +13,7 @@ rust-version = "1.56.0"
[dependencies]
aes-gcm = "0.9"
anyhow = "1"
+blake2 = "0.10.4"
bytesize = "1"
chrono = "0.4"
directories-next = "2"
diff --git a/obnam.md b/obnam.md
index a02aacb..a6f9b4c 100644
--- a/obnam.md
+++ b/obnam.md
@@ -1092,7 +1092,7 @@ storage of backed up data.
~~~scenario
given a working Obnam system
and a file data.dat containing some random data
-when I POST data.dat to /v1/chunks, with chunk-meta: {"label":"abc"}
+when I POST data.dat to /v1/chunks, with chunk-meta: {"label":"0abc"}
then HTTP status code is 201
and content-type is application/json
and the JSON body has a field chunk_id, henceforth ID
@@ -1105,17 +1105,17 @@ We must be able to retrieve it.
when I GET /v1/chunks/<ID>
then HTTP status code is 200
and content-type is application/octet-stream
-and chunk-meta is {"label":"abc"}
+and chunk-meta is {"label":"0abc"}
and the body matches file data.dat
~~~
We must also be able to find it based on metadata.
~~~scenario
-when I GET /v1/chunks?label=abc
+when I GET /v1/chunks?label=0abc
then HTTP status code is 200
and content-type is application/json
-and the JSON body matches {"<ID>":{"label":"abc"}}
+and the JSON body matches {"<ID>":{"label":"0abc"}}
~~~
Finally, we must be able to delete it. After that, we must not be able
@@ -1128,7 +1128,7 @@ then HTTP status code is 200
when I GET /v1/chunks/<ID>
then HTTP status code is 404
-when I GET /v1/chunks?label=abc
+when I GET /v1/chunks?label=0abc
then HTTP status code is 200
and content-type is application/json
and the JSON body matches {}
@@ -1151,7 +1151,7 @@ We must get an empty result if searching for chunks that don't exist.
~~~scenario
given a working Obnam system
-when I GET /v1/chunks?label=abc
+when I GET /v1/chunks?label=0abc
then HTTP status code is 200
and content-type is application/json
and the JSON body matches {}
@@ -1178,7 +1178,7 @@ First, create a chunk.
~~~scenario
given a working Obnam system
and a file data.dat containing some random data
-when I POST data.dat to /v1/chunks, with chunk-meta: {"label":"abc"}
+when I POST data.dat to /v1/chunks, with chunk-meta: {"label":"0abc"}
then HTTP status code is 201
and content-type is application/json
and the JSON body has a field chunk_id, henceforth ID
@@ -1194,10 +1194,10 @@ given a running chunk server
Can we still find it by its metadata?
~~~scenario
-when I GET /v1/chunks?label=abc
+when I GET /v1/chunks?label=0abc
then HTTP status code is 200
and content-type is application/json
-and the JSON body matches {"<ID>":{"label":"abc"}}
+and the JSON body matches {"<ID>":{"label":"0abc"}}
~~~
Can we still retrieve it by its identifier?
@@ -1206,7 +1206,7 @@ Can we still retrieve it by its identifier?
when I GET /v1/chunks/<ID>
then HTTP status code is 200
and content-type is application/octet-stream
-and chunk-meta is {"label":"abc"}
+and chunk-meta is {"label":"0abc"}
and the body matches file data.dat
~~~
@@ -1901,7 +1901,9 @@ then stdout, as JSON, has all the values in file geninfo.json
"major": 0,
"minor": 0
},
- "extras": {}
+ "extras": {
+ "checksum_kind": "sha256"
+ }
}
~~~
diff --git a/src/backup_run.rs b/src/backup_run.rs
index 29e82fc..2418871 100644
--- a/src/backup_run.rs
+++ b/src/backup_run.rs
@@ -15,6 +15,7 @@ use crate::fsiter::{AnnotatedFsEntry, FsIterError, FsIterator};
use crate::generation::{
GenId, LocalGeneration, LocalGenerationError, NascentError, NascentGeneration,
};
+use crate::label::LabelChecksumKind;
use crate::performance::{Clock, Performance};
use crate::policy::BackupPolicy;
use crate::schema::SchemaVersion;
@@ -24,10 +25,12 @@ use chrono::{DateTime, Local};
use log::{debug, error, info, warn};
use std::path::{Path, PathBuf};
+const DEFAULT_CHECKSUM_KIND: LabelChecksumKind = LabelChecksumKind::Sha256;
const SQLITE_CHUNK_SIZE: usize = MIB as usize;
/// A running backup.
pub struct BackupRun<'a> {
+ checksum_kind: Option<LabelChecksumKind>,
client: &'a BackupClient,
policy: BackupPolicy,
buffer_size: usize,
@@ -105,6 +108,7 @@ impl<'a> BackupRun<'a> {
/// Create a new run for an initial backup.
pub fn initial(config: &ClientConfig, client: &'a BackupClient) -> Result<Self, BackupError> {
Ok(Self {
+ checksum_kind: Some(DEFAULT_CHECKSUM_KIND),
client,
policy: BackupPolicy::default(),
buffer_size: config.chunk_size,
@@ -118,6 +122,7 @@ impl<'a> BackupRun<'a> {
client: &'a BackupClient,
) -> Result<Self, BackupError> {
Ok(Self {
+ checksum_kind: None,
client,
policy: BackupPolicy::default(),
buffer_size: config.chunk_size,
@@ -136,7 +141,7 @@ impl<'a> BackupRun<'a> {
None => {
// Create a new, empty generation.
let schema = schema_version(DEFAULT_SCHEMA_MAJOR).unwrap();
- NascentGeneration::create(oldname, schema)?.close()?;
+ NascentGeneration::create(oldname, schema, self.checksum_kind.unwrap())?.close()?;
// Open the newly created empty generation.
Ok(LocalGeneration::open(oldname)?)
@@ -146,6 +151,11 @@ impl<'a> BackupRun<'a> {
let old = self.fetch_previous_generation(genid, oldname).await?;
perf.stop(Clock::GenerationDownload);
+ let meta = old.meta()?;
+ if let Some(v) = meta.get("checksum_kind") {
+ self.checksum_kind = Some(LabelChecksumKind::from(v)?);
+ }
+
let progress = BackupProgress::incremental();
progress.files_in_previous_generation(old.file_count()? as u64);
self.progress = Some(progress);
@@ -155,6 +165,12 @@ impl<'a> BackupRun<'a> {
}
}
+ fn checksum_kind(&self) -> LabelChecksumKind {
+ self.checksum_kind
+ .or(Some(LabelChecksumKind::Sha256))
+ .unwrap()
+ }
+
async fn fetch_previous_generation(
&self,
genid: &GenId,
@@ -185,7 +201,7 @@ impl<'a> BackupRun<'a> {
let mut warnings: Vec<BackupError> = vec![];
let mut new_cachedir_tags = vec![];
let files_count = {
- let mut new = NascentGeneration::create(newpath, schema)?;
+ let mut new = NascentGeneration::create(newpath, schema, self.checksum_kind.unwrap())?;
for root in &config.roots {
match self.backup_one_root(config, old, &mut new, root).await {
Ok(mut o) => {
@@ -378,7 +394,7 @@ impl<'a> BackupRun<'a> {
let mut chunk_ids = vec![];
let file = std::fs::File::open(filename)
.map_err(|err| ClientError::FileOpen(filename.to_path_buf(), err))?;
- let chunker = FileChunks::new(size, file, filename);
+ let chunker = FileChunks::new(size, file, filename, self.checksum_kind());
for item in chunker {
let chunk = item?;
if let Some(chunk_id) = self.client.has_chunk(chunk.meta()).await? {
diff --git a/src/checksummer.rs b/src/checksummer.rs
deleted file mode 100644
index 50bce04..0000000
--- a/src/checksummer.rs
+++ /dev/null
@@ -1,41 +0,0 @@
-//! Compute checksums of data.
-//!
-//! De-duplication of backed up data in Obnam relies on cryptographic
-//! checksums. They are implemented in this module. Note that Obnam
-//! does not aim to make these algorithms configurable, so only a very
-//! small number of carefully chosen algorithms are supported here.
-
-use sha2::{Digest, Sha256};
-use std::fmt;
-
-/// A checksum of some data.
-#[derive(Debug, Clone)]
-pub enum Checksum {
- /// A SHA256 checksum.
- Sha256(String),
-}
-
-impl Checksum {
- /// Compute a SHA256 checksum for a block of data.
- pub fn sha256(data: &[u8]) -> Self {
- let mut hasher = Sha256::new();
- hasher.update(data);
- let hash = hasher.finalize();
- Self::Sha256(format!("{:x}", hash))
- }
-
- /// Create a `Checksum` from a known, previously computed hash.
- pub fn sha256_from_str_unchecked(hash: &str) -> Self {
- Self::Sha256(hash.to_string())
- }
-}
-
-impl fmt::Display for Checksum {
- /// Format a checksum for display.
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- let hash = match self {
- Self::Sha256(hash) => hash,
- };
- write!(f, "{}", hash)
- }
-}
diff --git a/src/chunk.rs b/src/chunk.rs
index 27a3ab9..4f604b9 100644
--- a/src/chunk.rs
+++ b/src/chunk.rs
@@ -1,8 +1,8 @@
//! Chunks of data.
-use crate::checksummer::Checksum;
use crate::chunkid::ChunkId;
use crate::chunkmeta::ChunkMeta;
+use crate::label::Label;
use serde::{Deserialize, Serialize};
use std::default::Default;
@@ -97,7 +97,7 @@ impl GenerationChunk {
let json: String =
serde_json::to_string(self).map_err(GenerationChunkError::JsonGenerate)?;
let bytes = json.as_bytes().to_vec();
- let checksum = Checksum::sha256(&bytes);
+ let checksum = Label::sha256(&bytes);
let meta = ChunkMeta::new(&checksum);
Ok(DataChunk::new(bytes, meta))
}
@@ -185,7 +185,7 @@ impl ClientTrust {
pub fn to_data_chunk(&self) -> Result<DataChunk, ClientTrustError> {
let json: String = serde_json::to_string(self).map_err(ClientTrustError::JsonGenerate)?;
let bytes = json.as_bytes().to_vec();
- let checksum = Checksum::sha256_from_str_unchecked("client-trust");
+ let checksum = Label::literal("client-trust");
let meta = ChunkMeta::new(&checksum);
Ok(DataChunk::new(bytes, meta))
}
diff --git a/src/chunker.rs b/src/chunker.rs
index 7954621..29f8a90 100644
--- a/src/chunker.rs
+++ b/src/chunker.rs
@@ -1,14 +1,15 @@
//! Split file data into chunks.
-use crate::checksummer::Checksum;
use crate::chunk::DataChunk;
use crate::chunkmeta::ChunkMeta;
+use crate::label::{Label, LabelChecksumKind};
use std::io::prelude::*;
use std::path::{Path, PathBuf};
/// Iterator over chunks in a file.
pub struct FileChunks {
chunk_size: usize,
+ kind: LabelChecksumKind,
buf: Vec<u8>,
filename: PathBuf,
handle: std::fs::File,
@@ -24,11 +25,17 @@ pub enum ChunkerError {
impl FileChunks {
/// Create new iterator.
- pub fn new(chunk_size: usize, handle: std::fs::File, filename: &Path) -> Self {
+ pub fn new(
+ chunk_size: usize,
+ handle: std::fs::File,
+ filename: &Path,
+ kind: LabelChecksumKind,
+ ) -> Self {
let mut buf = vec![];
buf.resize(chunk_size, 0);
Self {
chunk_size,
+ kind,
buf,
handle,
filename: filename.to_path_buf(),
@@ -54,7 +61,10 @@ impl FileChunks {
}
let buffer = &self.buf.as_slice()[..used];
- let hash = Checksum::sha256(buffer);
+ let hash = match self.kind {
+ LabelChecksumKind::Blake2 => Label::blake2(buffer),
+ LabelChecksumKind::Sha256 => Label::sha256(buffer),
+ };
let meta = ChunkMeta::new(&hash);
let chunk = DataChunk::new(buffer.to_vec(), meta);
Ok(Some(chunk))
diff --git a/src/chunkid.rs b/src/chunkid.rs
index 3534627..50fc3d3 100644
--- a/src/chunkid.rs
+++ b/src/chunkid.rs
@@ -3,7 +3,7 @@
//! Chunk identifiers are chosen by the server. Each chunk has a
//! unique identifier, which isn't based on the contents of the chunk.
-use crate::checksummer::Checksum;
+use crate::label::Label;
use rusqlite::types::ToSqlOutput;
use rusqlite::ToSql;
use serde::{Deserialize, Serialize};
@@ -53,8 +53,8 @@ impl ChunkId {
}
/// Return the SHA256 checksum of the identifier.
- pub fn sha256(&self) -> Checksum {
- Checksum::sha256(self.id.as_bytes())
+ pub fn sha256(&self) -> Label {
+ Label::sha256(self.id.as_bytes())
}
}
diff --git a/src/chunkmeta.rs b/src/chunkmeta.rs
index 33c1070..fe7ef4c 100644
--- a/src/chunkmeta.rs
+++ b/src/chunkmeta.rs
@@ -1,6 +1,6 @@
//! Metadata about a chunk.
-use crate::checksummer::Checksum;
+use crate::label::Label;
use serde::{Deserialize, Serialize};
use std::default::Default;
use std::str::FromStr;
@@ -37,9 +37,9 @@ impl ChunkMeta {
/// Create a new data chunk.
///
/// Data chunks are not for generations.
- pub fn new(checksum: &Checksum) -> Self {
+ pub fn new(label: &Label) -> Self {
ChunkMeta {
- label: checksum.to_string(),
+ label: label.serialize(),
}
}
@@ -79,20 +79,20 @@ impl FromStr for ChunkMeta {
#[cfg(test)]
mod test {
- use super::{Checksum, ChunkMeta};
+ use super::{ChunkMeta, Label};
#[test]
fn new_creates_data_chunk() {
- let sum = Checksum::sha256_from_str_unchecked("abcdef");
+ let sum = Label::sha256(b"abcdef");
let meta = ChunkMeta::new(&sum);
- assert_eq!(meta.label(), "abcdef");
+ assert_eq!(meta.label(), sum.serialize());
}
#[test]
fn new_generation_creates_generation_chunk() {
- let sum = Checksum::sha256_from_str_unchecked("abcdef");
+ let sum = Label::sha256(b"abcdef");
let meta = ChunkMeta::new(&sum);
- assert_eq!(meta.label(), "abcdef");
+ assert_eq!(meta.label(), sum.serialize());
}
#[test]
@@ -113,7 +113,7 @@ mod test {
#[test]
fn generation_json_roundtrip() {
- let sum = Checksum::sha256_from_str_unchecked("abcdef");
+ let sum = Label::sha256(b"abcdef");
let meta = ChunkMeta::new(&sum);
let json = serde_json::to_string(&meta).unwrap();
let meta2 = serde_json::from_str(&json).unwrap();
@@ -122,7 +122,7 @@ mod test {
#[test]
fn data_json_roundtrip() {
- let sum = Checksum::sha256_from_str_unchecked("abcdef");
+ let sum = Label::sha256(b"abcdef");
let meta = ChunkMeta::new(&sum);
let json = meta.to_json_vec();
let meta2 = serde_json::from_slice(&json).unwrap();
diff --git a/src/cipher.rs b/src/cipher.rs
index ee7fb8f..7bd2e84 100644
--- a/src/cipher.rs
+++ b/src/cipher.rs
@@ -191,15 +191,15 @@ impl Nonce {
#[cfg(test)]
mod test {
- use crate::checksummer::Checksum;
use crate::chunk::DataChunk;
use crate::chunkmeta::ChunkMeta;
use crate::cipher::{CipherEngine, CipherError, CHUNK_V1, NONCE_SIZE};
+ use crate::label::Label;
use crate::passwords::Passwords;
#[test]
fn metadata_as_aad() {
- let sum = Checksum::sha256_from_str_unchecked("dummy-checksum");
+ let sum = Label::sha256(b"dummy data");
let meta = ChunkMeta::new(&sum);
let meta_as_aad = meta.to_json_vec();
let chunk = DataChunk::new("hello".as_bytes().to_vec(), meta);
@@ -212,7 +212,7 @@ mod test {
#[test]
fn round_trip() {
- let sum = Checksum::sha256_from_str_unchecked("dummy-checksum");
+ let sum = Label::sha256(b"dummy data");
let meta = ChunkMeta::new(&sum);
let chunk = DataChunk::new("hello".as_bytes().to_vec(), meta);
let pass = Passwords::new("secret");
diff --git a/src/client.rs b/src/client.rs
index c5d66c1..bed5f1e 100644
--- a/src/client.rs
+++ b/src/client.rs
@@ -9,6 +9,7 @@ use crate::cipher::{CipherEngine, CipherError};
use crate::config::{ClientConfig, ClientConfigError};
use crate::generation::{FinishedGeneration, GenId, LocalGeneration, LocalGenerationError};
use crate::genlist::GenerationList;
+use crate::label::Label;
use log::{debug, error, info};
use reqwest::header::HeaderMap;
@@ -195,7 +196,8 @@ impl BackupClient {
}
async fn find_client_trusts(&self) -> Result<Vec<ChunkId>, ClientError> {
- let body = match self.get("", &[("label", "client-trust")]).await {
+ let label = Label::literal("client-trust").serialize();
+ let body = match self.get("", &[("label", &label)]).await {
Ok((_, body)) => body,
Err(err) => return Err(err),
};
diff --git a/src/cmd/backup.rs b/src/cmd/backup.rs
index 8a85703..60045cc 100644
--- a/src/cmd/backup.rs
+++ b/src/cmd/backup.rs
@@ -19,7 +19,11 @@ use tokio::runtime::Runtime;
/// Make a backup.
#[derive(Debug, StructOpt)]
pub struct Backup {
- /// Backup schema major version.
+ /// Force a full backup, instead of an incremental one.
+ #[structopt(long)]
+ full: bool,
+
+ /// Backup schema major version to use.
#[structopt(long)]
backup_version: Option<VersionComponent>,
}
@@ -53,29 +57,35 @@ impl Backup {
let oldtemp = temp.path().join("old.db");
let newtemp = temp.path().join("new.db");
- let (is_incremental, outcome) = match genlist.resolve("latest") {
- Err(_) => {
- info!("fresh backup without a previous generation");
- let mut run = BackupRun::initial(config, &client)?;
- let old = run.start(None, &oldtemp, perf).await?;
- (
- false,
- run.backup_roots(config, &old, &newtemp, schema, perf)
- .await?,
- )
- }
- Ok(old_id) => {
- info!("incremental backup based on {}", old_id);
- let mut run = BackupRun::incremental(config, &client)?;
- let old = run.start(Some(&old_id), &oldtemp, perf).await?;
- (
- true,
- run.backup_roots(config, &old, &newtemp, schema, perf)
- .await?,
- )
+ let old_id = if self.full {
+ None
+ } else {
+ match genlist.resolve("latest") {
+ Err(_) => None,
+ Ok(old_id) => Some(old_id),
}
};
+ let (is_incremental, outcome) = if let Some(old_id) = old_id {
+ info!("incremental backup based on {}", old_id);
+ let mut run = BackupRun::incremental(config, &client)?;
+ let old = run.start(Some(&old_id), &oldtemp, perf).await?;
+ (
+ true,
+ run.backup_roots(config, &old, &newtemp, schema, perf)
+ .await?,
+ )
+ } else {
+ info!("fresh backup without a previous generation");
+ let mut run = BackupRun::initial(config, &client)?;
+ let old = run.start(None, &oldtemp, perf).await?;
+ (
+ false,
+ run.backup_roots(config, &old, &newtemp, schema, perf)
+ .await?,
+ )
+ };
+
perf.start(Clock::GenerationUpload);
let mut trust = trust;
trust.append_backup(outcome.gen_id.as_chunk_id());
diff --git a/src/dbgen.rs b/src/dbgen.rs
index 816ea11..8e5ece5 100644
--- a/src/dbgen.rs
+++ b/src/dbgen.rs
@@ -5,6 +5,7 @@ use crate::chunkid::ChunkId;
use crate::db::{Column, Database, DatabaseError, SqlResults, Table, Value};
use crate::fsentry::FilesystemEntry;
use crate::genmeta::{GenerationMeta, GenerationMetaError};
+use crate::label::LabelChecksumKind;
use crate::schema::{SchemaVersion, VersionComponent};
use log::error;
use std::collections::HashMap;
@@ -90,14 +91,15 @@ impl GenerationDb {
pub fn create<P: AsRef<Path>>(
filename: P,
schema: SchemaVersion,
+ checksum_kind: LabelChecksumKind,
) -> Result<Self, GenerationDbError> {
let meta_table = Self::meta_table();
let variant = match schema.version() {
(V0_0::MAJOR, V0_0::MINOR) => {
- GenerationDbVariant::V0_0(V0_0::create(filename, meta_table)?)
+ GenerationDbVariant::V0_0(V0_0::create(filename, meta_table, checksum_kind)?)
}
(V1_0::MAJOR, V1_0::MINOR) => {
- GenerationDbVariant::V1_0(V1_0::create(filename, meta_table)?)
+ GenerationDbVariant::V1_0(V1_0::create(filename, meta_table, checksum_kind)?)
}
(major, minor) => return Err(GenerationDbError::Incompatible(major, minor)),
};
@@ -240,11 +242,15 @@ impl V0_0 {
const MINOR: VersionComponent = 0;
/// Create a new generation database in read/write mode.
- pub fn create<P: AsRef<Path>>(filename: P, meta: Table) -> Result<Self, GenerationDbError> {
+ pub fn create<P: AsRef<Path>>(
+ filename: P,
+ meta: Table,
+ checksum_kind: LabelChecksumKind,
+ ) -> Result<Self, GenerationDbError> {
let db = Database::create(filename.as_ref())?;
let mut moi = Self::new(db, meta);
moi.created = true;
- moi.create_tables()?;
+ moi.create_tables(checksum_kind)?;
Ok(moi)
}
@@ -276,7 +282,7 @@ impl V0_0 {
}
}
- fn create_tables(&mut self) -> Result<(), GenerationDbError> {
+ fn create_tables(&mut self, checksum_kind: LabelChecksumKind) -> Result<(), GenerationDbError> {
self.db.create_table(&self.meta)?;
self.db.create_table(&self.files)?;
self.db.create_table(&self.chunks)?;
@@ -295,6 +301,13 @@ impl V0_0 {
Value::text("value", &format!("{}", Self::MINOR)),
],
)?;
+ self.db.insert(
+ &self.meta,
+ &[
+ Value::text("key", "checksum_kind"),
+ Value::text("value", checksum_kind.serialize()),
+ ],
+ )?;
Ok(())
}
@@ -483,11 +496,15 @@ impl V1_0 {
const MINOR: VersionComponent = 0;
/// Create a new generation database in read/write mode.
- pub fn create<P: AsRef<Path>>(filename: P, meta: Table) -> Result<Self, GenerationDbError> {
+ pub fn create<P: AsRef<Path>>(
+ filename: P,
+ meta: Table,
+ checksum_kind: LabelChecksumKind,
+ ) -> Result<Self, GenerationDbError> {
let db = Database::create(filename.as_ref())?;
let mut moi = Self::new(db, meta);
moi.created = true;
- moi.create_tables()?;
+ moi.create_tables(checksum_kind)?;
Ok(moi)
}
@@ -519,7 +536,7 @@ impl V1_0 {
}
}
- fn create_tables(&mut self) -> Result<(), GenerationDbError> {
+ fn create_tables(&mut self, checksum_kind: LabelChecksumKind) -> Result<(), GenerationDbError> {
self.db.create_table(&self.meta)?;
self.db.create_table(&self.files)?;
self.db.create_table(&self.chunks)?;
@@ -538,6 +555,13 @@ impl V1_0 {
Value::text("value", &format!("{}", Self::MINOR)),
],
)?;
+ self.db.insert(
+ &self.meta,
+ &[
+ Value::text("key", "checksum_kind"),
+ Value::text("value", checksum_kind.serialize()),
+ ],
+ )?;
Ok(())
}
diff --git a/src/error.rs b/src/error.rs
index 9c9b432..928f258 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -10,6 +10,7 @@ use crate::db::DatabaseError;
use crate::dbgen::GenerationDbError;
use crate::generation::{LocalGenerationError, NascentError};
use crate::genlist::GenerationListError;
+use crate::label::LabelError;
use crate::passwords::PasswordError;
use std::path::PathBuf;
use std::time::SystemTimeError;
@@ -22,6 +23,10 @@ use tempfile::PersistError;
/// convenience.
#[derive(Debug, thiserror::Error)]
pub enum ObnamError {
+ /// Error from chunk labels.
+ #[error(transparent)]
+ Label(#[from] LabelError),
+
/// Error listing generations on server.
#[error(transparent)]
GenerationListError(#[from] GenerationListError),
diff --git a/src/generation.rs b/src/generation.rs
index 715b426..0a0fc77 100644
--- a/src/generation.rs
+++ b/src/generation.rs
@@ -6,6 +6,7 @@ use crate::db::{DatabaseError, SqlResults};
use crate::dbgen::{FileId, GenerationDb, GenerationDbError};
use crate::fsentry::FilesystemEntry;
use crate::genmeta::{GenerationMeta, GenerationMetaError};
+use crate::label::LabelChecksumKind;
use crate::schema::{SchemaVersion, VersionComponent};
use std::fmt;
use std::path::{Path, PathBuf};
@@ -77,11 +78,15 @@ pub enum NascentError {
impl NascentGeneration {
/// Create a new nascent generation.
- pub fn create<P>(filename: P, schema: SchemaVersion) -> Result<Self, NascentError>
+ pub fn create<P>(
+ filename: P,
+ schema: SchemaVersion,
+ checksum_kind: LabelChecksumKind,
+ ) -> Result<Self, NascentError>
where
P: AsRef<Path>,
{
- let db = GenerationDb::create(filename.as_ref(), schema)?;
+ let db = GenerationDb::create(filename.as_ref(), schema, checksum_kind)?;
Ok(Self { db, fileno: 0 })
}
@@ -290,7 +295,7 @@ impl LocalGeneration {
#[cfg(test)]
mod test {
- use super::{LocalGeneration, NascentGeneration, SchemaVersion};
+ use super::{LabelChecksumKind, LocalGeneration, NascentGeneration, SchemaVersion};
use tempfile::NamedTempFile;
#[test]
@@ -298,7 +303,8 @@ mod test {
let filename = NamedTempFile::new().unwrap().path().to_path_buf();
let schema = SchemaVersion::new(0, 0);
{
- let mut _gen = NascentGeneration::create(&filename, schema).unwrap();
+ let mut _gen =
+ NascentGeneration::create(&filename, schema, LabelChecksumKind::Sha256).unwrap();
// _gen is dropped here; the connection is close; the file
// should not be removed.
}
@@ -328,7 +334,8 @@ mod test {
let tag_path2 = Path::new("/another_dir/a_tag");
let schema = SchemaVersion::new(0, 0);
- let mut gen = NascentGeneration::create(&dbfile, schema).unwrap();
+ let mut gen =
+ NascentGeneration::create(&dbfile, schema, LabelChecksumKind::Sha256).unwrap();
let mut cache = users::UsersCache::new();
gen.insert(
diff --git a/src/genmeta.rs b/src/genmeta.rs
index 2ce4c4c..d5b14a3 100644
--- a/src/genmeta.rs
+++ b/src/genmeta.rs
@@ -26,6 +26,11 @@ impl GenerationMeta {
pub fn schema_version(&self) -> SchemaVersion {
self.schema_version
}
+
+ /// Get a value corresponding to a key in the meta table.
+ pub fn get(&self, key: &str) -> Option<&String> {
+ self.extras.get(key)
+ }
}
fn metastr(map: &mut HashMap<String, String>, key: &str) -> Result<String, GenerationMetaError> {
diff --git a/src/index.rs b/src/index.rs
index 11f3480..52da2f2 100644
--- a/src/index.rs
+++ b/src/index.rs
@@ -1,8 +1,8 @@
//! An on-disk index of chunks for the server.
-use crate::checksummer::Checksum;
use crate::chunkid::ChunkId;
use crate::chunkmeta::ChunkMeta;
+use crate::label::Label;
use rusqlite::Connection;
use std::path::Path;
@@ -74,7 +74,7 @@ impl Index {
#[cfg(test)]
mod test {
- use crate::checksummer::Checksum;
+ use super::Label;
use super::{ChunkId, ChunkMeta, Index};
use std::path::Path;
@@ -87,20 +87,20 @@ mod test {
#[test]
fn remembers_inserted() {
let id: ChunkId = "id001".parse().unwrap();
- let sum = Checksum::sha256_from_str_unchecked("abc");
+ let sum = Label::sha256(b"abc");
let meta = ChunkMeta::new(&sum);
let dir = tempdir().unwrap();
let mut idx = new_index(dir.path());
idx.insert_meta(id.clone(), meta.clone()).unwrap();
assert_eq!(idx.get_meta(&id).unwrap(), meta);
- let ids = idx.find_by_label("abc").unwrap();
+ let ids = idx.find_by_label(&sum.serialize()).unwrap();
assert_eq!(ids, vec![id]);
}
#[test]
fn does_not_find_uninserted() {
let id: ChunkId = "id001".parse().unwrap();
- let sum = Checksum::sha256_from_str_unchecked("abc");
+ let sum = Label::sha256(b"abc");
let meta = ChunkMeta::new(&sum);
let dir = tempdir().unwrap();
let mut idx = new_index(dir.path());
@@ -111,19 +111,19 @@ mod test {
#[test]
fn removes_inserted() {
let id: ChunkId = "id001".parse().unwrap();
- let sum = Checksum::sha256_from_str_unchecked("abc");
+ let sum = Label::sha256(b"abc");
let meta = ChunkMeta::new(&sum);
let dir = tempdir().unwrap();
let mut idx = new_index(dir.path());
idx.insert_meta(id.clone(), meta).unwrap();
idx.remove_meta(&id).unwrap();
- let ids: Vec<ChunkId> = idx.find_by_label("abc").unwrap();
+ let ids: Vec<ChunkId> = idx.find_by_label(&sum.serialize()).unwrap();
assert_eq!(ids, vec![]);
}
}
mod sql {
- use super::{Checksum, IndexError};
+ use super::{IndexError, Label};
use crate::chunkid::ChunkId;
use crate::chunkmeta::ChunkMeta;
use log::error;
@@ -216,7 +216,7 @@ mod sql {
fn row_to_meta(row: &Row) -> rusqlite::Result<ChunkMeta> {
let hash: String = row.get("label")?;
- let sha256 = Checksum::sha256_from_str_unchecked(&hash);
+ let sha256 = Label::deserialize(&hash).expect("deserialize checksum from database");
Ok(ChunkMeta::new(&sha256))
}
diff --git a/src/label.rs b/src/label.rs
new file mode 100644
index 0000000..19d270a
--- /dev/null
+++ b/src/label.rs
@@ -0,0 +1,138 @@
+//! A chunk label.
+//!
+//! De-duplication of backed up data in Obnam relies on cryptographic
+//! checksums. They are implemented in this module. Note that Obnam
+//! does not aim to make these algorithms configurable, so only a very
+//! small number of carefully chosen algorithms are supported here.
+
+use blake2::Blake2s256;
+use sha2::{Digest, Sha256};
+
+const LITERAL: char = '0';
+const SHA256: char = '1';
+const BLAKE2: char = '2';
+
+/// A checksum of some data.
+#[derive(Debug, Clone)]
+pub enum Label {
+ /// An arbitrary, literal string.
+ Literal(String),
+
+ /// A SHA256 checksum.
+ Sha256(String),
+
+ /// A BLAKE2s checksum.
+ Blake2(String),
+}
+
+impl Label {
+ /// Construct a literal string.
+ pub fn literal(s: &str) -> Self {
+ Self::Literal(s.to_string())
+ }
+
+ /// Compute a SHA256 checksum for a block of data.
+ pub fn sha256(data: &[u8]) -> Self {
+ let mut hasher = Sha256::new();
+ hasher.update(data);
+ let hash = hasher.finalize();
+ Self::Sha256(format!("{:x}", hash))
+ }
+
+ /// Compute a BLAKE2s checksum for a block of data.
+ pub fn blake2(data: &[u8]) -> Self {
+ let mut hasher = Blake2s256::new();
+ hasher.update(data);
+ let hash = hasher.finalize();
+ Self::Sha256(format!("{:x}", hash))
+ }
+
+ /// Serialize a label into a string representation.
+ pub fn serialize(&self) -> String {
+ match self {
+ Self::Literal(s) => format!("{}{}", LITERAL, s),
+ Self::Sha256(hash) => format!("{}{}", SHA256, hash),
+ Self::Blake2(hash) => format!("{}{}", BLAKE2, hash),
+ }
+ }
+
+ /// De-serialize a label from its string representation.
+ pub fn deserialize(s: &str) -> Result<Self, LabelError> {
+ if s.starts_with(LITERAL) {
+ Ok(Self::Literal(s[1..].to_string()))
+ } else if s.starts_with(SHA256) {
+ Ok(Self::Sha256(s[1..].to_string()))
+ } else {
+ Err(LabelError::UnknownType(s.to_string()))
+ }
+ }
+}
+
+/// Kinds of checksum labels.
+#[derive(Debug, Clone, Copy, Eq, PartialEq)]
+pub enum LabelChecksumKind {
+ /// Use a Blake2 checksum.
+ Blake2,
+
+ /// Use a SHA256 checksum.
+ Sha256,
+}
+
+impl LabelChecksumKind {
+ /// Parse a string into a label checksum kind.
+ pub fn from(s: &str) -> Result<Self, LabelError> {
+ if s == "sha256" {
+ Ok(Self::Sha256)
+ } else if s == "blake2" {
+ Ok(Self::Blake2)
+ } else {
+ Err(LabelError::UnknownType(s.to_string()))
+ }
+ }
+
+ /// Serialize a checksum kind into a string.
+ pub fn serialize(self) -> &'static str {
+ match self {
+ Self::Sha256 => "sha256",
+ Self::Blake2 => "blake2",
+ }
+ }
+}
+
+/// Possible errors from dealing with chunk labels.
+#[derive(Debug, thiserror::Error)]
+pub enum LabelError {
+ /// Serialized label didn't start with a known type prefix.
+ #[error("Unknown label: {0:?}")]
+ UnknownType(String),
+}
+
+#[cfg(test)]
+mod test {
+ use super::{Label, LabelChecksumKind};
+
+ #[test]
+ fn roundtrip_literal() {
+ let label = Label::literal("dummy data");
+ let serialized = label.serialize();
+ let de = Label::deserialize(&serialized).unwrap();
+ let seri2 = de.serialize();
+ assert_eq!(serialized, seri2);
+ }
+
+ #[test]
+ fn roundtrip_sha256() {
+ let label = Label::sha256(b"dummy data");
+ let serialized = label.serialize();
+ let de = Label::deserialize(&serialized).unwrap();
+ let seri2 = de.serialize();
+ assert_eq!(serialized, seri2);
+ }
+
+ #[test]
+ fn roundtrip_checksum_kind() {
+ for kind in [LabelChecksumKind::Sha256, LabelChecksumKind::Blake2] {
+ assert_eq!(LabelChecksumKind::from(kind.serialize()).unwrap(), kind);
+ }
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
index a0a53c7..fbbea15 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -9,7 +9,6 @@ pub mod accumulated_time;
pub mod backup_progress;
pub mod backup_reason;
pub mod backup_run;
-pub mod checksummer;
pub mod chunk;
pub mod chunker;
pub mod chunkid;
@@ -29,6 +28,7 @@ pub mod genlist;
pub mod genmeta;
pub mod index;
pub mod indexedstore;
+pub mod label;
pub mod passwords;
pub mod performance;
pub mod policy;
diff --git a/src/server.rs b/src/server.rs
index 31a03fc..6b688d6 100644
--- a/src/server.rs
+++ b/src/server.rs
@@ -141,7 +141,7 @@ impl SearchHits {
#[cfg(test)]
mod test_search_hits {
use super::{ChunkMeta, SearchHits};
- use crate::checksummer::Checksum;
+ use crate::label::Label;
#[test]
fn no_search_hits() {
@@ -152,7 +152,7 @@ mod test_search_hits {
#[test]
fn one_search_hit() {
let id = "abc".parse().unwrap();
- let sum = Checksum::sha256_from_str_unchecked("123");
+ let sum = Label::sha256(b"123");
let meta = ChunkMeta::new(&sum);
let mut hits = SearchHits::default();
hits.insert(id, meta);