summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2021-05-24 08:55:14 +0300
committerLars Wirzenius <liw@liw.fi>2021-05-29 11:41:15 +0300
commit6de230c382a4329df00bc11cc1ffb90390b13159 (patch)
treed4f0668be0d5cd07ea32af2b0978696658532122
parent566dd94d2e46c489b50d84a1fd24683460e5cfdc (diff)
downloadobnam2-6de230c382a4329df00bc11cc1ffb90390b13159.tar.gz
refactor: make metadata be part of datachunk
This makes it harder to lose the metadata for a chunk, or to use unrelated metadata and chunk. Also, soon I will refactor things for encrypting chunks, which will need metadata embedded in the encrypted chunk. Sponsored-by: author
-rw-r--r--src/benchmark.rs6
-rw-r--r--src/bin/benchmark-index.rs7
-rw-r--r--src/bin/benchmark-indexedstore.rs8
-rw-r--r--src/bin/benchmark-null.rs2
-rw-r--r--src/bin/benchmark-store.rs4
-rw-r--r--src/bin/obnam-server.rs6
-rw-r--r--src/chunk.rs22
-rw-r--r--src/chunker.rs12
-rw-r--r--src/client.rs30
-rw-r--r--src/indexedstore.rs6
-rw-r--r--src/store.rs12
11 files changed, 62 insertions, 53 deletions
diff --git a/src/benchmark.rs b/src/benchmark.rs
index d214939..3c94f92 100644
--- a/src/benchmark.rs
+++ b/src/benchmark.rs
@@ -15,7 +15,7 @@ impl ChunkGenerator {
}
impl Iterator for ChunkGenerator {
- type Item = (ChunkId, String, ChunkMeta, DataChunk);
+ type Item = (ChunkId, String, DataChunk);
fn next(&mut self) -> Option<Self::Item> {
if self.next >= self.goal {
@@ -24,9 +24,9 @@ impl Iterator for ChunkGenerator {
let id = ChunkId::recreate(&format!("{}", self.next));
let checksum = id.sha256();
let meta = ChunkMeta::new(&checksum);
- let chunk = DataChunk::new(vec![]);
+ let chunk = DataChunk::new(vec![], meta);
self.next += 1;
- Some((id, checksum, meta, chunk))
+ Some((id, checksum, chunk))
}
}
}
diff --git a/src/bin/benchmark-index.rs b/src/bin/benchmark-index.rs
index 9baa327..b5a059c 100644
--- a/src/bin/benchmark-index.rs
+++ b/src/bin/benchmark-index.rs
@@ -60,7 +60,8 @@ fn create(chunks: &Path, num: u32) -> anyhow::Result<()> {
let mut index = Index::new(chunks)?;
let gen = ChunkGenerator::new(num);
- for (id, _, meta, _) in gen {
+ for (id, _, chunk) in gen {
+ let meta = (*chunk.meta()).clone();
index.insert_meta(id, meta)?;
}
@@ -82,8 +83,8 @@ fn lookup(index: &mut Index, num: u32) -> anyhow::Result<()> {
loop {
let gen = ChunkGenerator::new(num);
- for (_, _, meta, _) in gen {
- index.find_by_sha256(&meta.sha256())?;
+ for (_, _, chunk) in gen {
+ index.find_by_sha256(&chunk.meta().sha256())?;
done += 1;
if done >= num {
return Ok(());
diff --git a/src/bin/benchmark-indexedstore.rs b/src/bin/benchmark-indexedstore.rs
index acc3bd3..5cd3ff1 100644
--- a/src/bin/benchmark-indexedstore.rs
+++ b/src/bin/benchmark-indexedstore.rs
@@ -60,8 +60,8 @@ fn create(chunks: &Path, num: u32) -> anyhow::Result<()> {
let mut store = IndexedStore::new(chunks)?;
let gen = ChunkGenerator::new(num);
- for (_, _, meta, chunk) in gen {
- store.save(&meta, &chunk)?;
+ for (_, _, chunk) in gen {
+ store.save(&chunk)?;
}
Ok(())
@@ -82,8 +82,8 @@ fn lookup(index: &mut IndexedStore, num: u32) -> anyhow::Result<()> {
loop {
let gen = ChunkGenerator::new(num);
- for (_, _, meta, _) in gen {
- index.find_by_sha256(&meta.sha256())?;
+ for (_, _, chunk) in gen {
+ index.find_by_sha256(&chunk.meta().sha256())?;
done += 1;
if done >= num {
return Ok(());
diff --git a/src/bin/benchmark-null.rs b/src/bin/benchmark-null.rs
index 259a837..fc60a77 100644
--- a/src/bin/benchmark-null.rs
+++ b/src/bin/benchmark-null.rs
@@ -23,5 +23,5 @@ fn main() {
let opt = Opt::from_args();
let gen = ChunkGenerator::new(opt.num);
- for (_, _, _, _) in gen {}
+ for (_, _, _) in gen {}
}
diff --git a/src/bin/benchmark-store.rs b/src/bin/benchmark-store.rs
index f7c82b1..7896f9d 100644
--- a/src/bin/benchmark-store.rs
+++ b/src/bin/benchmark-store.rs
@@ -20,8 +20,8 @@ fn main() -> anyhow::Result<()> {
let gen = ChunkGenerator::new(opt.num);
let store = Store::new(&opt.chunks);
- for (id, _, meta, chunk) in gen {
- store.save(&id, &meta, &chunk)?;
+ for (id, _, chunk) in gen {
+ store.save(&id, &&chunk)?;
}
Ok(())
diff --git a/src/bin/obnam-server.rs b/src/bin/obnam-server.rs
index 9a6540f..efee77e 100644
--- a/src/bin/obnam-server.rs
+++ b/src/bin/obnam-server.rs
@@ -109,9 +109,9 @@ pub async fn create_chunk(
}
};
- let chunk = DataChunk::new(data.to_vec());
+ let chunk = DataChunk::new(data.to_vec(), meta);
- let id = match store.save(&meta, &chunk) {
+ let id = match store.save(&chunk) {
Ok(id) => id,
Err(e) => {
error!("couldn't save: {}", e);
@@ -119,7 +119,7 @@ pub async fn create_chunk(
}
};
- info!("created chunk {}: {:?}", id, meta);
+ info!("created chunk {}", id);
Ok(ChunkResult::Created(id))
}
diff --git a/src/chunk.rs b/src/chunk.rs
index 0eed38a..50a2fc7 100644
--- a/src/chunk.rs
+++ b/src/chunk.rs
@@ -1,4 +1,6 @@
+use crate::checksummer::sha256;
use crate::chunkid::ChunkId;
+use crate::chunkmeta::ChunkMeta;
use serde::{Deserialize, Serialize};
use std::default::Default;
@@ -11,18 +13,24 @@ use std::default::Default;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DataChunk {
data: Vec<u8>,
+ meta: ChunkMeta,
}
impl DataChunk {
/// Construct a new chunk.
- pub fn new(data: Vec<u8>) -> Self {
- Self { data }
+ pub fn new(data: Vec<u8>, meta: ChunkMeta) -> Self {
+ Self { data, meta }
}
/// Return a chunk's data.
pub fn data(&self) -> &[u8] {
&self.data
}
+
+ /// Return a chunk's metadata.
+ pub fn meta(&self) -> &ChunkMeta {
+ &self.meta
+ }
}
#[derive(Default, Debug, Serialize, Deserialize)]
@@ -69,8 +77,12 @@ impl GenerationChunk {
self.chunk_ids.iter()
}
- pub fn to_data_chunk(&self) -> GenerationChunkResult<DataChunk> {
- let json = serde_json::to_string(self).map_err(GenerationChunkError::JsonGenerate)?;
- Ok(DataChunk::new(json.as_bytes().to_vec()))
+ pub fn to_data_chunk(&self, ended: &str) -> GenerationChunkResult<DataChunk> {
+ let json: String =
+ serde_json::to_string(self).map_err(GenerationChunkError::JsonGenerate)?;
+ let bytes = json.as_bytes().to_vec();
+ let sha = sha256(&bytes);
+ let meta = ChunkMeta::new_generation(&sha, ended);
+ Ok(DataChunk::new(bytes, meta))
}
}
diff --git a/src/chunker.rs b/src/chunker.rs
index eeeed8d..a7a39f1 100644
--- a/src/chunker.rs
+++ b/src/chunker.rs
@@ -31,7 +31,7 @@ impl Chunker {
}
}
- pub fn read_chunk(&mut self) -> ChunkerResult<Option<(ChunkMeta, DataChunk)>> {
+ pub fn read_chunk(&mut self) -> ChunkerResult<Option<DataChunk>> {
let mut used = 0;
loop {
@@ -52,18 +52,18 @@ impl Chunker {
let buffer = &self.buf.as_slice()[..used];
let hash = sha256(buffer);
let meta = ChunkMeta::new(&hash);
- let chunk = DataChunk::new(buffer.to_vec());
- Ok(Some((meta, chunk)))
+ let chunk = DataChunk::new(buffer.to_vec(), meta);
+ Ok(Some(chunk))
}
}
impl Iterator for Chunker {
- type Item = ChunkerResult<(ChunkMeta, DataChunk)>;
+ type Item = ChunkerResult<DataChunk>;
- fn next(&mut self) -> Option<ChunkerResult<(ChunkMeta, DataChunk)>> {
+ fn next(&mut self) -> Option<ChunkerResult<DataChunk>> {
match self.read_chunk() {
Ok(None) => None,
- Ok(Some((meta, chunk))) => Some(Ok((meta, chunk))),
+ Ok(Some(chunk)) => Some(Ok(chunk)),
Err(e) => Some(Err(e)),
}
}
diff --git a/src/client.rs b/src/client.rs
index 114574c..00f5bd7 100644
--- a/src/client.rs
+++ b/src/client.rs
@@ -107,10 +107,9 @@ impl BackupClient {
info!("upload SQLite {}", filename.display());
let ids = self.read_file(filename, size)?;
let gen = GenerationChunk::new(ids);
- let data = gen.to_data_chunk()?;
- let meta = ChunkMeta::new_generation(&sha256(data.data()), &current_timestamp());
- let gen_id = self.upload_gen_chunk(meta.clone(), gen)?;
- info!("uploaded generation {}, meta {:?}", gen_id, meta);
+ let data = gen.to_data_chunk(&current_timestamp())?;
+ let gen_id = self.upload_chunk(data)?;
+ info!("uploaded generation {}", gen_id);
Ok(gen_id)
}
@@ -127,24 +126,19 @@ impl BackupClient {
self.chunk_client.has_chunk(meta)
}
- pub fn upload_chunk(&self, meta: ChunkMeta, chunk: DataChunk) -> ClientResult<ChunkId> {
- self.chunk_client.upload_chunk(meta, chunk)
- }
-
- pub fn upload_gen_chunk(&self, meta: ChunkMeta, gen: GenerationChunk) -> ClientResult<ChunkId> {
- let data = gen.to_data_chunk()?;
- self.upload_chunk(meta, data)
+ pub fn upload_chunk(&self, chunk: DataChunk) -> ClientResult<ChunkId> {
+ self.chunk_client.upload_chunk(chunk)
}
pub fn upload_new_file_chunks(&self, chunker: Chunker) -> ClientResult<Vec<ChunkId>> {
let mut chunk_ids = vec![];
for item in chunker {
- let (meta, chunk) = item?;
- if let Some(chunk_id) = self.has_chunk(&meta)? {
+ let chunk = item?;
+ if let Some(chunk_id) = self.has_chunk(chunk.meta())? {
chunk_ids.push(chunk_id.clone());
info!("reusing existing chunk {}", chunk_id);
} else {
- let chunk_id = self.upload_chunk(meta, chunk)?;
+ let chunk_id = self.upload_chunk(chunk)?;
chunk_ids.push(chunk_id.clone());
info!("created new chunk {}", chunk_id);
}
@@ -246,11 +240,11 @@ impl ChunkClient {
Ok(has)
}
- pub fn upload_chunk(&self, meta: ChunkMeta, chunk: DataChunk) -> ClientResult<ChunkId> {
+ pub fn upload_chunk(&self, chunk: DataChunk) -> ClientResult<ChunkId> {
let res = self
.client
.post(&self.chunks_url())
- .header("chunk-meta", meta.to_json())
+ .header("chunk-meta", chunk.meta().to_json())
.body(chunk.data().to_vec())
.send()
.map_err(ClientError::ReqwestError)?;
@@ -262,7 +256,7 @@ impl ChunkClient {
} else {
return Err(ClientError::NoCreatedChunkId);
};
- info!("uploaded_chunk {} meta {:?}", chunk_id, meta);
+ info!("uploaded_chunk {}", chunk_id);
Ok(chunk_id)
}
@@ -335,7 +329,7 @@ impl ChunkClient {
return Err(err);
}
- let chunk: DataChunk = DataChunk::new(body);
+ let chunk: DataChunk = DataChunk::new(body, meta);
Ok(chunk)
}
diff --git a/src/indexedstore.rs b/src/indexedstore.rs
index 7f67a1f..982e2d9 100644
--- a/src/indexedstore.rs
+++ b/src/indexedstore.rs
@@ -40,10 +40,10 @@ impl IndexedStore {
Ok(Self { store, index })
}
- pub fn save(&mut self, meta: &ChunkMeta, chunk: &DataChunk) -> IndexedResult<ChunkId> {
+ pub fn save(&mut self, chunk: &DataChunk) -> IndexedResult<ChunkId> {
let id = ChunkId::new();
- self.store.save(&id, meta, chunk)?;
- self.insert_meta(&id, meta)?;
+ self.store.save(&id, chunk)?;
+ self.insert_meta(&id, chunk.meta())?;
Ok(id)
}
diff --git a/src/store.rs b/src/store.rs
index fca2c13..bccecc7 100644
--- a/src/store.rs
+++ b/src/store.rs
@@ -1,6 +1,5 @@
use crate::chunk::DataChunk;
use crate::chunkid::ChunkId;
-use crate::chunkmeta::ChunkMeta;
use std::path::{Path, PathBuf};
/// Store chunks, with metadata, persistently.
@@ -43,23 +42,26 @@ impl Store {
}
/// Save a chunk into a store.
- pub fn save(&self, id: &ChunkId, meta: &ChunkMeta, chunk: &DataChunk) -> StoreResult<()> {
+ pub fn save(&self, id: &ChunkId, chunk: &DataChunk) -> StoreResult<()> {
let (dir, metaname, dataname) = &self.filenames(id);
if !dir.exists() {
std::fs::create_dir_all(dir)?;
}
- std::fs::write(&metaname, meta.to_json())?;
+ std::fs::write(&metaname, chunk.meta().to_json())?;
std::fs::write(&dataname, chunk.data())?;
Ok(())
}
/// Load a chunk from a store.
pub fn load(&self, id: &ChunkId) -> StoreResult<DataChunk> {
- let (_, _, dataname) = &self.filenames(id);
+ let (_, metaname, dataname) = &self.filenames(id);
+ let meta = std::fs::read(&metaname)?;
+ let meta = serde_json::from_slice(&meta)?;
+
let data = std::fs::read(&dataname)?;
- let data = DataChunk::new(data);
+ let data = DataChunk::new(data, meta);
Ok(data)
}