summaryrefslogtreecommitdiff
path: root/src/chunker.rs
blob: 239423005bc57cdf73d9b0ad9b7510d6d65f39de (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
//! Split file data into chunks.

use crate::chunk::DataChunk;
use crate::chunkmeta::ChunkMeta;
use crate::label::Label;
use std::io::prelude::*;
use std::path::{Path, PathBuf};

/// Iterator over chunks in a file.
pub struct FileChunks {
    chunk_size: usize,
    buf: Vec<u8>,
    filename: PathBuf,
    handle: std::fs::File,
}

/// Possible errors from data chunking.
#[derive(Debug, thiserror::Error)]
pub enum ChunkerError {
    /// Error reading from a file.
    #[error("failed to read file {0}: {1}")]
    FileRead(PathBuf, std::io::Error),
}

impl FileChunks {
    /// Create new iterator.
    pub fn new(chunk_size: usize, handle: std::fs::File, filename: &Path) -> Self {
        let mut buf = vec![];
        buf.resize(chunk_size, 0);
        Self {
            chunk_size,
            buf,
            handle,
            filename: filename.to_path_buf(),
        }
    }

    fn read_chunk(&mut self) -> Result<Option<DataChunk>, ChunkerError> {
        let mut used = 0;

        loop {
            let n = self
                .handle
                .read(&mut self.buf.as_mut_slice()[used..])
                .map_err(|err| ChunkerError::FileRead(self.filename.to_path_buf(), err))?;
            used += n;
            if n == 0 || used == self.chunk_size {
                break;
            }
        }

        if used == 0 {
            return Ok(None);
        }

        let buffer = &self.buf.as_slice()[..used];
        let hash = Label::sha256(buffer);
        let meta = ChunkMeta::new(&hash);
        let chunk = DataChunk::new(buffer.to_vec(), meta);
        Ok(Some(chunk))
    }
}

impl Iterator for FileChunks {
    type Item = Result<DataChunk, ChunkerError>;

    /// Return the next chunk, if any, or an error.
    fn next(&mut self) -> Option<Result<DataChunk, ChunkerError>> {
        match self.read_chunk() {
            Ok(None) => None,
            Ok(Some(chunk)) => Some(Ok(chunk)),
            Err(e) => Some(Err(e)),
        }
    }
}