From 6ed6b1bc75b1995a7740ff28bc26a908b91f37c8 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Wed, 15 Sep 2021 18:12:50 +0300 Subject: refactor: define a Checksum type and use it where appropriate This will make it harder to compare, say, a SHA-256 and a SHA3, later, when we add more checksum types. Sponsored-by: author --- src/benchmark.rs | 3 ++- src/checksummer.rs | 32 +++++++++++++++++++++++++++----- src/chunk.rs | 4 ++-- src/chunker.rs | 4 ++-- src/chunkid.rs | 6 +++--- src/chunkmeta.rs | 19 ++++++++++++------- src/cipher.rs | 7 +++++-- src/index.rs | 23 ++++++++++++++++------- src/server.rs | 4 +++- 9 files changed, 72 insertions(+), 30 deletions(-) diff --git a/src/benchmark.rs b/src/benchmark.rs index 3c94f92..e5057ac 100644 --- a/src/benchmark.rs +++ b/src/benchmark.rs @@ -1,3 +1,4 @@ +use crate::checksummer::Checksum; use crate::chunk::DataChunk; use crate::chunkid::ChunkId; use crate::chunkmeta::ChunkMeta; @@ -15,7 +16,7 @@ impl ChunkGenerator { } impl Iterator for ChunkGenerator { - type Item = (ChunkId, String, DataChunk); + type Item = (ChunkId, Checksum, DataChunk); fn next(&mut self) -> Option { if self.next >= self.goal { diff --git a/src/checksummer.rs b/src/checksummer.rs index 162c26b..18b8afb 100644 --- a/src/checksummer.rs +++ b/src/checksummer.rs @@ -1,8 +1,30 @@ use sha2::{Digest, Sha256}; +use std::fmt; -pub fn sha256(data: &[u8]) -> String { - let mut hasher = Sha256::new(); - hasher.update(data); - let hash = hasher.finalize(); - format!("{:x}", hash) +/// A checksum of some data. +#[derive(Debug, Clone)] +pub enum Checksum { + Sha256(String), +} + +impl Checksum { + pub fn sha256(data: &[u8]) -> Self { + let mut hasher = Sha256::new(); + hasher.update(data); + let hash = hasher.finalize(); + Self::Sha256(format!("{:x}", hash)) + } + + pub fn sha256_from_str_unchecked(hash: &str) -> Self { + Self::Sha256(hash.to_string()) + } +} + +impl fmt::Display for Checksum { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let hash = match self { + Self::Sha256(hash) => hash, + }; + write!(f, "{}", hash) + } } diff --git a/src/chunk.rs b/src/chunk.rs index 469185f..266d1a7 100644 --- a/src/chunk.rs +++ b/src/chunk.rs @@ -1,4 +1,4 @@ -use crate::checksummer::sha256; +use crate::checksummer::Checksum; use crate::chunkid::ChunkId; use crate::chunkmeta::ChunkMeta; use serde::{Deserialize, Serialize}; @@ -78,7 +78,7 @@ impl GenerationChunk { let json: String = serde_json::to_string(self).map_err(GenerationChunkError::JsonGenerate)?; let bytes = json.as_bytes().to_vec(); - let sha = sha256(&bytes); + let sha = Checksum::sha256(&bytes); let meta = ChunkMeta::new_generation(&sha, ended); Ok(DataChunk::new(bytes, meta)) } diff --git a/src/chunker.rs b/src/chunker.rs index 763c148..f096365 100644 --- a/src/chunker.rs +++ b/src/chunker.rs @@ -1,4 +1,4 @@ -use crate::checksummer::sha256; +use crate::checksummer::Checksum; use crate::chunk::DataChunk; use crate::chunkmeta::ChunkMeta; use std::io::prelude::*; @@ -48,7 +48,7 @@ impl Chunker { } let buffer = &self.buf.as_slice()[..used]; - let hash = sha256(buffer); + let hash = Checksum::sha256(buffer); let meta = ChunkMeta::new(&hash); let chunk = DataChunk::new(buffer.to_vec(), meta); Ok(Some(chunk)) diff --git a/src/chunkid.rs b/src/chunkid.rs index 2f67d79..39e3ee1 100644 --- a/src/chunkid.rs +++ b/src/chunkid.rs @@ -1,4 +1,4 @@ -use crate::checksummer::sha256; +use crate::checksummer::Checksum; use rusqlite::types::ToSqlOutput; use rusqlite::ToSql; use serde::{Deserialize, Serialize}; @@ -46,8 +46,8 @@ impl ChunkId { self.id.as_bytes() } - pub fn sha256(&self) -> String { - sha256(self.id.as_bytes()) + pub fn sha256(&self) -> Checksum { + Checksum::sha256(self.id.as_bytes()) } } diff --git a/src/chunkmeta.rs b/src/chunkmeta.rs index 73d9007..f8a8114 100644 --- a/src/chunkmeta.rs +++ b/src/chunkmeta.rs @@ -1,3 +1,4 @@ +use crate::checksummer::Checksum; use serde::{Deserialize, Serialize}; use std::default::Default; use std::str::FromStr; @@ -48,7 +49,7 @@ impl ChunkMeta { /// Create a new data chunk. /// /// Data chunks are not for generations. - pub fn new(sha256: &str) -> Self { + pub fn new(sha256: &Checksum) -> Self { ChunkMeta { sha256: sha256.to_string(), generation: None, @@ -57,7 +58,7 @@ impl ChunkMeta { } /// Create a new generation chunk. - pub fn new_generation(sha256: &str, ended: &str) -> Self { + pub fn new_generation(sha256: &Checksum, ended: &str) -> Self { ChunkMeta { sha256: sha256.to_string(), generation: Some(true), @@ -107,11 +108,12 @@ impl FromStr for ChunkMeta { #[cfg(test)] mod test { - use super::ChunkMeta; + use super::{Checksum, ChunkMeta}; #[test] fn new_creates_data_chunk() { - let meta = ChunkMeta::new("abcdef"); + let sum = Checksum::sha256_from_str_unchecked("abcdef"); + let meta = ChunkMeta::new(&sum); assert!(!meta.is_generation()); assert_eq!(meta.ended(), None); assert_eq!(meta.sha256(), "abcdef"); @@ -119,7 +121,8 @@ mod test { #[test] fn new_generation_creates_generation_chunk() { - let meta = ChunkMeta::new_generation("abcdef", "2020-09-17T08:17:13+03:00"); + let sum = Checksum::sha256_from_str_unchecked("abcdef"); + let meta = ChunkMeta::new_generation(&sum, "2020-09-17T08:17:13+03:00"); assert!(meta.is_generation()); assert_eq!(meta.ended(), Some("2020-09-17T08:17:13+03:00")); assert_eq!(meta.sha256(), "abcdef"); @@ -146,7 +149,8 @@ mod test { #[test] fn generation_json_roundtrip() { - let meta = ChunkMeta::new_generation("abcdef", "2020-09-17T08:17:13+03:00"); + let sum = Checksum::sha256_from_str_unchecked("abcdef"); + let meta = ChunkMeta::new_generation(&sum, "2020-09-17T08:17:13+03:00"); let json = serde_json::to_string(&meta).unwrap(); let meta2 = serde_json::from_str(&json).unwrap(); assert_eq!(meta, meta2); @@ -154,7 +158,8 @@ mod test { #[test] fn data_json_roundtrip() { - let meta = ChunkMeta::new("abcdef"); + let sum = Checksum::sha256_from_str_unchecked("abcdef"); + let meta = ChunkMeta::new(&sum); let json = meta.to_json_vec(); let meta2 = serde_json::from_slice(&json).unwrap(); assert_eq!(meta, meta2); diff --git a/src/cipher.rs b/src/cipher.rs index b8e02f2..04b2944 100644 --- a/src/cipher.rs +++ b/src/cipher.rs @@ -164,6 +164,7 @@ impl Nonce { #[cfg(test)] mod test { + use crate::checksummer::Checksum; use crate::chunk::DataChunk; use crate::chunkmeta::ChunkMeta; use crate::cipher::{CipherEngine, CipherError, CHUNK_V1, NONCE_SIZE}; @@ -171,7 +172,8 @@ mod test { #[test] fn metadata_as_aad() { - let meta = ChunkMeta::new("dummy-checksum"); + let sum = Checksum::sha256_from_str_unchecked("dummy-checksum"); + let meta = ChunkMeta::new(&sum); let meta_as_aad = meta.to_json_vec(); let chunk = DataChunk::new("hello".as_bytes().to_vec(), meta); let pass = Passwords::new("secret"); @@ -183,7 +185,8 @@ mod test { #[test] fn round_trip() { - let meta = ChunkMeta::new("dummy-checksum"); + let sum = Checksum::sha256_from_str_unchecked("dummy-checksum"); + let meta = ChunkMeta::new(&sum); let chunk = DataChunk::new("hello".as_bytes().to_vec(), meta); let pass = Passwords::new("secret"); diff --git a/src/index.rs b/src/index.rs index e6bbb95..8cbe01e 100644 --- a/src/index.rs +++ b/src/index.rs @@ -1,3 +1,4 @@ +use crate::checksummer::Checksum; use crate::chunkid::ChunkId; use crate::chunkmeta::ChunkMeta; use rusqlite::Connection; @@ -80,6 +81,8 @@ impl Index { #[cfg(test)] mod test { + use crate::checksummer::Checksum; + use super::{ChunkId, ChunkMeta, Index}; use std::path::Path; use tempfile::tempdir; @@ -91,7 +94,8 @@ mod test { #[test] fn remembers_inserted() { let id: ChunkId = "id001".parse().unwrap(); - let meta = ChunkMeta::new("abc"); + let sum = Checksum::sha256_from_str_unchecked("abc"); + let meta = ChunkMeta::new(&sum); let dir = tempdir().unwrap(); let mut idx = new_index(dir.path()); idx.insert_meta(id.clone(), meta.clone()).unwrap(); @@ -103,7 +107,8 @@ mod test { #[test] fn does_not_find_uninserted() { let id: ChunkId = "id001".parse().unwrap(); - let meta = ChunkMeta::new("abc"); + let sum = Checksum::sha256_from_str_unchecked("abc"); + let meta = ChunkMeta::new(&sum); let dir = tempdir().unwrap(); let mut idx = new_index(dir.path()); idx.insert_meta(id, meta).unwrap(); @@ -113,7 +118,8 @@ mod test { #[test] fn removes_inserted() { let id: ChunkId = "id001".parse().unwrap(); - let meta = ChunkMeta::new("abc"); + let sum = Checksum::sha256_from_str_unchecked("abc"); + let meta = ChunkMeta::new(&sum); let dir = tempdir().unwrap(); let mut idx = new_index(dir.path()); idx.insert_meta(id.clone(), meta).unwrap(); @@ -132,7 +138,8 @@ mod test { #[test] fn remembers_generation() { let id: ChunkId = "id001".parse().unwrap(); - let meta = ChunkMeta::new_generation("abc", "timestamp"); + let sum = Checksum::sha256_from_str_unchecked("abc"); + let meta = ChunkMeta::new_generation(&sum, "timestamp"); let dir = tempdir().unwrap(); let mut idx = new_index(dir.path()); idx.insert_meta(id.clone(), meta).unwrap(); @@ -142,7 +149,8 @@ mod test { #[test] fn removes_generation() { let id: ChunkId = "id001".parse().unwrap(); - let meta = ChunkMeta::new_generation("abc", "timestamp"); + let sum = Checksum::sha256_from_str_unchecked("abc"); + let meta = ChunkMeta::new_generation(&sum, "timestamp"); let dir = tempdir().unwrap(); let mut idx = new_index(dir.path()); idx.insert_meta(id.clone(), meta).unwrap(); @@ -152,7 +160,7 @@ mod test { } mod sql { - use super::IndexError; + use super::{Checksum, IndexError}; use crate::chunkid::ChunkId; use crate::chunkmeta::ChunkMeta; use log::error; @@ -256,7 +264,8 @@ mod sql { } fn row_to_meta(row: &Row) -> rusqlite::Result { - let sha256: String = row.get(row.column_index("sha256")?)?; + let hash: String = row.get(row.column_index("sha256")?)?; + let sha256 = Checksum::sha256_from_str_unchecked(&hash); let generation: i32 = row.get(row.column_index("generation")?)?; let meta = if generation == 0 { ChunkMeta::new(&sha256) diff --git a/src/server.rs b/src/server.rs index 3b0584f..26f67bd 100644 --- a/src/server.rs +++ b/src/server.rs @@ -119,6 +119,7 @@ impl SearchHits { #[cfg(test)] mod test_search_hits { use super::{ChunkMeta, SearchHits}; + use crate::checksummer::Checksum; #[test] fn no_search_hits() { @@ -129,7 +130,8 @@ mod test_search_hits { #[test] fn one_search_hit() { let id = "abc".parse().unwrap(); - let meta = ChunkMeta::new("123"); + let sum = Checksum::sha256_from_str_unchecked("123"); + let meta = ChunkMeta::new(&sum); let mut hits = SearchHits::default(); hits.insert(id, meta); eprintln!("hits: {:?}", hits); -- cgit v1.2.1 From 97b9327ac7f73b3a2629cd90da20d1655bf9478e Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sat, 18 Sep 2021 17:51:50 +0300 Subject: docs: move abstract out from doc metadata It can be moved back when Subplot supports that again. Sponsored-by: author --- obnam.md | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/obnam.md b/obnam.md index 7da389a..02634f0 100644 --- a/obnam.md +++ b/obnam.md @@ -18,15 +18,16 @@ functions: - lib/runcmd.py classes: - json -abstract: | - Obnam is a backup system, consisting of a not very smart server for - storing chunks of backup data, and a client that splits the user's - data into chunks. They communicate via HTTP. - - This document describes the architecture and acceptance criteria for - Obnam, as well as how the acceptance criteria are verified. ... +# Abstract + +Obnam is a backup system, consisting of a not very smart server for +storing chunks of backup data, and a client that splits the user's +data into chunks. They communicate via HTTP. + +This document describes the architecture and acceptance criteria for +Obnam, as well as how the acceptance criteria are verified. # Introduction -- cgit v1.2.1