summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2020-11-08 18:43:50 +0200
committerLars Wirzenius <liw@liw.fi>2020-11-08 18:50:32 +0200
commitebf6eb198ede5fb66320c22b3dee2188106c89ff (patch)
tree9d817019fc363f28bd1df523c67e0c191fddad51 /src
parent5cd1ea00ad5d60d5f54a23002bca6e8b40b9cd64 (diff)
downloadobnam2-ebf6eb198ede5fb66320c22b3dee2188106c89ff.tar.gz
feat(src/chunker.rs): add abstraction for chunking live data
This is very rudimentary for now
Diffstat (limited to 'src')
-rw-r--r--src/chunker.rs60
1 files changed, 60 insertions, 0 deletions
diff --git a/src/chunker.rs b/src/chunker.rs
new file mode 100644
index 0000000..f4ca74c
--- /dev/null
+++ b/src/chunker.rs
@@ -0,0 +1,60 @@
+use crate::chunk::DataChunk;
+use crate::chunkmeta::ChunkMeta;
+use sha2::{Digest, Sha256};
+use std::io::prelude::*;
+
+pub struct Chunker {
+ chunk_size: usize,
+ buf: Vec<u8>,
+ handle: std::fs::File,
+}
+
+impl Chunker {
+ pub fn new(chunk_size: usize, handle: std::fs::File) -> Self {
+ let mut buf = vec![];
+ buf.resize(chunk_size, 0);
+ Self {
+ chunk_size,
+ buf,
+ handle,
+ }
+ }
+
+ pub fn read_chunk(&mut self) -> anyhow::Result<Option<(ChunkMeta, DataChunk)>> {
+ let mut used = 0;
+
+ loop {
+ let n = self.handle.read(&mut self.buf.as_mut_slice()[used..])?;
+ used += n;
+ if n == 0 || used == self.chunk_size {
+ break;
+ }
+ }
+
+ if used == 0 {
+ return Ok(None);
+ }
+
+ let buffer = &self.buf.as_slice()[..used];
+ let mut hasher = Sha256::new();
+ hasher.update(buffer);
+ let hash = hasher.finalize();
+ let hash = format!("{:x}", hash);
+ let meta = ChunkMeta::new(&hash);
+
+ let chunk = DataChunk::new(buffer.to_vec());
+ Ok(Some((meta, chunk)))
+ }
+}
+
+impl Iterator for Chunker {
+ type Item = anyhow::Result<(ChunkMeta, DataChunk)>;
+
+ fn next(&mut self) -> Option<anyhow::Result<(ChunkMeta, DataChunk)>> {
+ match self.read_chunk() {
+ Ok(None) => None,
+ Ok(Some((meta, chunk))) => Some(Ok((meta, chunk))),
+ Err(e) => Some(Err(e)),
+ }
+ }
+}