diff options
author | Lars Wirzenius <liw@liw.fi> | 2020-11-08 18:43:50 +0200 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2020-11-08 18:50:32 +0200 |
commit | ebf6eb198ede5fb66320c22b3dee2188106c89ff (patch) | |
tree | 9d817019fc363f28bd1df523c67e0c191fddad51 | |
parent | 5cd1ea00ad5d60d5f54a23002bca6e8b40b9cd64 (diff) | |
download | obnam2-ebf6eb198ede5fb66320c22b3dee2188106c89ff.tar.gz |
feat(src/chunker.rs): add abstraction for chunking live data
This is very rudimentary for now
-rw-r--r-- | src/chunker.rs | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/src/chunker.rs b/src/chunker.rs new file mode 100644 index 0000000..f4ca74c --- /dev/null +++ b/src/chunker.rs @@ -0,0 +1,60 @@ +use crate::chunk::DataChunk; +use crate::chunkmeta::ChunkMeta; +use sha2::{Digest, Sha256}; +use std::io::prelude::*; + +pub struct Chunker { + chunk_size: usize, + buf: Vec<u8>, + handle: std::fs::File, +} + +impl Chunker { + pub fn new(chunk_size: usize, handle: std::fs::File) -> Self { + let mut buf = vec![]; + buf.resize(chunk_size, 0); + Self { + chunk_size, + buf, + handle, + } + } + + pub fn read_chunk(&mut self) -> anyhow::Result<Option<(ChunkMeta, DataChunk)>> { + let mut used = 0; + + loop { + let n = self.handle.read(&mut self.buf.as_mut_slice()[used..])?; + used += n; + if n == 0 || used == self.chunk_size { + break; + } + } + + if used == 0 { + return Ok(None); + } + + let buffer = &self.buf.as_slice()[..used]; + let mut hasher = Sha256::new(); + hasher.update(buffer); + let hash = hasher.finalize(); + let hash = format!("{:x}", hash); + let meta = ChunkMeta::new(&hash); + + let chunk = DataChunk::new(buffer.to_vec()); + Ok(Some((meta, chunk))) + } +} + +impl Iterator for Chunker { + type Item = anyhow::Result<(ChunkMeta, DataChunk)>; + + fn next(&mut self) -> Option<anyhow::Result<(ChunkMeta, DataChunk)>> { + match self.read_chunk() { + Ok(None) => None, + Ok(Some((meta, chunk))) => Some(Ok((meta, chunk))), + Err(e) => Some(Err(e)), + } + } +} |