From 6de230c382a4329df00bc11cc1ffb90390b13159 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Mon, 24 May 2021 08:55:14 +0300 Subject: refactor: make metadata be part of datachunk This makes it harder to lose the metadata for a chunk, or to use unrelated metadata and chunk. Also, soon I will refactor things for encrypting chunks, which will need metadata embedded in the encrypted chunk. Sponsored-by: author --- src/benchmark.rs | 6 +++--- src/bin/benchmark-index.rs | 7 ++++--- src/bin/benchmark-indexedstore.rs | 8 ++++---- src/bin/benchmark-null.rs | 2 +- src/bin/benchmark-store.rs | 4 ++-- src/bin/obnam-server.rs | 6 +++--- src/chunk.rs | 22 +++++++++++++++++----- src/chunker.rs | 12 ++++++------ src/client.rs | 30 ++++++++++++------------------ src/indexedstore.rs | 6 +++--- src/store.rs | 12 +++++++----- 11 files changed, 62 insertions(+), 53 deletions(-) diff --git a/src/benchmark.rs b/src/benchmark.rs index d214939..3c94f92 100644 --- a/src/benchmark.rs +++ b/src/benchmark.rs @@ -15,7 +15,7 @@ impl ChunkGenerator { } impl Iterator for ChunkGenerator { - type Item = (ChunkId, String, ChunkMeta, DataChunk); + type Item = (ChunkId, String, DataChunk); fn next(&mut self) -> Option { if self.next >= self.goal { @@ -24,9 +24,9 @@ impl Iterator for ChunkGenerator { let id = ChunkId::recreate(&format!("{}", self.next)); let checksum = id.sha256(); let meta = ChunkMeta::new(&checksum); - let chunk = DataChunk::new(vec![]); + let chunk = DataChunk::new(vec![], meta); self.next += 1; - Some((id, checksum, meta, chunk)) + Some((id, checksum, chunk)) } } } diff --git a/src/bin/benchmark-index.rs b/src/bin/benchmark-index.rs index 9baa327..b5a059c 100644 --- a/src/bin/benchmark-index.rs +++ b/src/bin/benchmark-index.rs @@ -60,7 +60,8 @@ fn create(chunks: &Path, num: u32) -> anyhow::Result<()> { let mut index = Index::new(chunks)?; let gen = ChunkGenerator::new(num); - for (id, _, meta, _) in gen { + for (id, _, chunk) in gen { + let meta = (*chunk.meta()).clone(); index.insert_meta(id, meta)?; } @@ -82,8 +83,8 @@ fn lookup(index: &mut Index, num: u32) -> anyhow::Result<()> { loop { let gen = ChunkGenerator::new(num); - for (_, _, meta, _) in gen { - index.find_by_sha256(&meta.sha256())?; + for (_, _, chunk) in gen { + index.find_by_sha256(&chunk.meta().sha256())?; done += 1; if done >= num { return Ok(()); diff --git a/src/bin/benchmark-indexedstore.rs b/src/bin/benchmark-indexedstore.rs index acc3bd3..5cd3ff1 100644 --- a/src/bin/benchmark-indexedstore.rs +++ b/src/bin/benchmark-indexedstore.rs @@ -60,8 +60,8 @@ fn create(chunks: &Path, num: u32) -> anyhow::Result<()> { let mut store = IndexedStore::new(chunks)?; let gen = ChunkGenerator::new(num); - for (_, _, meta, chunk) in gen { - store.save(&meta, &chunk)?; + for (_, _, chunk) in gen { + store.save(&chunk)?; } Ok(()) @@ -82,8 +82,8 @@ fn lookup(index: &mut IndexedStore, num: u32) -> anyhow::Result<()> { loop { let gen = ChunkGenerator::new(num); - for (_, _, meta, _) in gen { - index.find_by_sha256(&meta.sha256())?; + for (_, _, chunk) in gen { + index.find_by_sha256(&chunk.meta().sha256())?; done += 1; if done >= num { return Ok(()); diff --git a/src/bin/benchmark-null.rs b/src/bin/benchmark-null.rs index 259a837..fc60a77 100644 --- a/src/bin/benchmark-null.rs +++ b/src/bin/benchmark-null.rs @@ -23,5 +23,5 @@ fn main() { let opt = Opt::from_args(); let gen = ChunkGenerator::new(opt.num); - for (_, _, _, _) in gen {} + for (_, _, _) in gen {} } diff --git a/src/bin/benchmark-store.rs b/src/bin/benchmark-store.rs index f7c82b1..7896f9d 100644 --- a/src/bin/benchmark-store.rs +++ b/src/bin/benchmark-store.rs @@ -20,8 +20,8 @@ fn main() -> anyhow::Result<()> { let gen = ChunkGenerator::new(opt.num); let store = Store::new(&opt.chunks); - for (id, _, meta, chunk) in gen { - store.save(&id, &meta, &chunk)?; + for (id, _, chunk) in gen { + store.save(&id, &&chunk)?; } Ok(()) diff --git a/src/bin/obnam-server.rs b/src/bin/obnam-server.rs index 9a6540f..efee77e 100644 --- a/src/bin/obnam-server.rs +++ b/src/bin/obnam-server.rs @@ -109,9 +109,9 @@ pub async fn create_chunk( } }; - let chunk = DataChunk::new(data.to_vec()); + let chunk = DataChunk::new(data.to_vec(), meta); - let id = match store.save(&meta, &chunk) { + let id = match store.save(&chunk) { Ok(id) => id, Err(e) => { error!("couldn't save: {}", e); @@ -119,7 +119,7 @@ pub async fn create_chunk( } }; - info!("created chunk {}: {:?}", id, meta); + info!("created chunk {}", id); Ok(ChunkResult::Created(id)) } diff --git a/src/chunk.rs b/src/chunk.rs index 0eed38a..50a2fc7 100644 --- a/src/chunk.rs +++ b/src/chunk.rs @@ -1,4 +1,6 @@ +use crate::checksummer::sha256; use crate::chunkid::ChunkId; +use crate::chunkmeta::ChunkMeta; use serde::{Deserialize, Serialize}; use std::default::Default; @@ -11,18 +13,24 @@ use std::default::Default; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct DataChunk { data: Vec, + meta: ChunkMeta, } impl DataChunk { /// Construct a new chunk. - pub fn new(data: Vec) -> Self { - Self { data } + pub fn new(data: Vec, meta: ChunkMeta) -> Self { + Self { data, meta } } /// Return a chunk's data. pub fn data(&self) -> &[u8] { &self.data } + + /// Return a chunk's metadata. + pub fn meta(&self) -> &ChunkMeta { + &self.meta + } } #[derive(Default, Debug, Serialize, Deserialize)] @@ -69,8 +77,12 @@ impl GenerationChunk { self.chunk_ids.iter() } - pub fn to_data_chunk(&self) -> GenerationChunkResult { - let json = serde_json::to_string(self).map_err(GenerationChunkError::JsonGenerate)?; - Ok(DataChunk::new(json.as_bytes().to_vec())) + pub fn to_data_chunk(&self, ended: &str) -> GenerationChunkResult { + let json: String = + serde_json::to_string(self).map_err(GenerationChunkError::JsonGenerate)?; + let bytes = json.as_bytes().to_vec(); + let sha = sha256(&bytes); + let meta = ChunkMeta::new_generation(&sha, ended); + Ok(DataChunk::new(bytes, meta)) } } diff --git a/src/chunker.rs b/src/chunker.rs index eeeed8d..a7a39f1 100644 --- a/src/chunker.rs +++ b/src/chunker.rs @@ -31,7 +31,7 @@ impl Chunker { } } - pub fn read_chunk(&mut self) -> ChunkerResult> { + pub fn read_chunk(&mut self) -> ChunkerResult> { let mut used = 0; loop { @@ -52,18 +52,18 @@ impl Chunker { let buffer = &self.buf.as_slice()[..used]; let hash = sha256(buffer); let meta = ChunkMeta::new(&hash); - let chunk = DataChunk::new(buffer.to_vec()); - Ok(Some((meta, chunk))) + let chunk = DataChunk::new(buffer.to_vec(), meta); + Ok(Some(chunk)) } } impl Iterator for Chunker { - type Item = ChunkerResult<(ChunkMeta, DataChunk)>; + type Item = ChunkerResult; - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { match self.read_chunk() { Ok(None) => None, - Ok(Some((meta, chunk))) => Some(Ok((meta, chunk))), + Ok(Some(chunk)) => Some(Ok(chunk)), Err(e) => Some(Err(e)), } } diff --git a/src/client.rs b/src/client.rs index 114574c..00f5bd7 100644 --- a/src/client.rs +++ b/src/client.rs @@ -107,10 +107,9 @@ impl BackupClient { info!("upload SQLite {}", filename.display()); let ids = self.read_file(filename, size)?; let gen = GenerationChunk::new(ids); - let data = gen.to_data_chunk()?; - let meta = ChunkMeta::new_generation(&sha256(data.data()), ¤t_timestamp()); - let gen_id = self.upload_gen_chunk(meta.clone(), gen)?; - info!("uploaded generation {}, meta {:?}", gen_id, meta); + let data = gen.to_data_chunk(¤t_timestamp())?; + let gen_id = self.upload_chunk(data)?; + info!("uploaded generation {}", gen_id); Ok(gen_id) } @@ -127,24 +126,19 @@ impl BackupClient { self.chunk_client.has_chunk(meta) } - pub fn upload_chunk(&self, meta: ChunkMeta, chunk: DataChunk) -> ClientResult { - self.chunk_client.upload_chunk(meta, chunk) - } - - pub fn upload_gen_chunk(&self, meta: ChunkMeta, gen: GenerationChunk) -> ClientResult { - let data = gen.to_data_chunk()?; - self.upload_chunk(meta, data) + pub fn upload_chunk(&self, chunk: DataChunk) -> ClientResult { + self.chunk_client.upload_chunk(chunk) } pub fn upload_new_file_chunks(&self, chunker: Chunker) -> ClientResult> { let mut chunk_ids = vec![]; for item in chunker { - let (meta, chunk) = item?; - if let Some(chunk_id) = self.has_chunk(&meta)? { + let chunk = item?; + if let Some(chunk_id) = self.has_chunk(chunk.meta())? { chunk_ids.push(chunk_id.clone()); info!("reusing existing chunk {}", chunk_id); } else { - let chunk_id = self.upload_chunk(meta, chunk)?; + let chunk_id = self.upload_chunk(chunk)?; chunk_ids.push(chunk_id.clone()); info!("created new chunk {}", chunk_id); } @@ -246,11 +240,11 @@ impl ChunkClient { Ok(has) } - pub fn upload_chunk(&self, meta: ChunkMeta, chunk: DataChunk) -> ClientResult { + pub fn upload_chunk(&self, chunk: DataChunk) -> ClientResult { let res = self .client .post(&self.chunks_url()) - .header("chunk-meta", meta.to_json()) + .header("chunk-meta", chunk.meta().to_json()) .body(chunk.data().to_vec()) .send() .map_err(ClientError::ReqwestError)?; @@ -262,7 +256,7 @@ impl ChunkClient { } else { return Err(ClientError::NoCreatedChunkId); }; - info!("uploaded_chunk {} meta {:?}", chunk_id, meta); + info!("uploaded_chunk {}", chunk_id); Ok(chunk_id) } @@ -335,7 +329,7 @@ impl ChunkClient { return Err(err); } - let chunk: DataChunk = DataChunk::new(body); + let chunk: DataChunk = DataChunk::new(body, meta); Ok(chunk) } diff --git a/src/indexedstore.rs b/src/indexedstore.rs index 7f67a1f..982e2d9 100644 --- a/src/indexedstore.rs +++ b/src/indexedstore.rs @@ -40,10 +40,10 @@ impl IndexedStore { Ok(Self { store, index }) } - pub fn save(&mut self, meta: &ChunkMeta, chunk: &DataChunk) -> IndexedResult { + pub fn save(&mut self, chunk: &DataChunk) -> IndexedResult { let id = ChunkId::new(); - self.store.save(&id, meta, chunk)?; - self.insert_meta(&id, meta)?; + self.store.save(&id, chunk)?; + self.insert_meta(&id, chunk.meta())?; Ok(id) } diff --git a/src/store.rs b/src/store.rs index fca2c13..bccecc7 100644 --- a/src/store.rs +++ b/src/store.rs @@ -1,6 +1,5 @@ use crate::chunk::DataChunk; use crate::chunkid::ChunkId; -use crate::chunkmeta::ChunkMeta; use std::path::{Path, PathBuf}; /// Store chunks, with metadata, persistently. @@ -43,23 +42,26 @@ impl Store { } /// Save a chunk into a store. - pub fn save(&self, id: &ChunkId, meta: &ChunkMeta, chunk: &DataChunk) -> StoreResult<()> { + pub fn save(&self, id: &ChunkId, chunk: &DataChunk) -> StoreResult<()> { let (dir, metaname, dataname) = &self.filenames(id); if !dir.exists() { std::fs::create_dir_all(dir)?; } - std::fs::write(&metaname, meta.to_json())?; + std::fs::write(&metaname, chunk.meta().to_json())?; std::fs::write(&dataname, chunk.data())?; Ok(()) } /// Load a chunk from a store. pub fn load(&self, id: &ChunkId) -> StoreResult { - let (_, _, dataname) = &self.filenames(id); + let (_, metaname, dataname) = &self.filenames(id); + let meta = std::fs::read(&metaname)?; + let meta = serde_json::from_slice(&meta)?; + let data = std::fs::read(&dataname)?; - let data = DataChunk::new(data); + let data = DataChunk::new(data, meta); Ok(data) } -- cgit v1.2.1