From 074ae1b24cf721ed3d86427795b34a4aefa1290c Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sat, 28 Nov 2020 17:03:04 +0200 Subject: feat: add SHA256 checksum of contents of regular files --- Cargo.toml | 1 + src/lib.rs | 29 +++++++++++++++++++++++++++++ subplot/summain.py | 4 ++++ summain.md | 5 +++++ 4 files changed, 39 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 3435e8c..73924e3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ edition = "2018" [dependencies] anyhow = "1" +digest = "0.9" serde = { version = "1", features = ["derive"] } serde_yaml = "0.8" sha2 = "0.9" diff --git a/src/lib.rs b/src/lib.rs index ccd652f..9f46f54 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -32,10 +32,15 @@ //! ~~~ use serde::Serialize; +use sha2::{Digest, Sha256}; use std::fs::symlink_metadata; +use std::fs::File; +use std::io::{BufReader, Read}; use std::os::linux::fs::MetadataExt; use std::path::{Path, PathBuf}; +const BUF_SIZE: usize = 1024 * 1024; + /// An entry in a file manifest. #[derive(Serialize, Debug)] pub struct ManifestEntry { @@ -48,6 +53,7 @@ pub struct ManifestEntry { mtime_nsec: i64, nlink: u64, size: Option, + sha256: Option, } impl ManifestEntry { @@ -59,6 +65,11 @@ impl ManifestEntry { /// The structure can be serialized using serde. pub fn new(path: &Path) -> std::io::Result { let m = symlink_metadata(path)?; + let hash = if m.is_file() { + Some(file_checksum(path)?) + } else { + None + }; Ok(Self { path: path.to_path_buf(), atime: m.st_atime(), @@ -68,10 +79,28 @@ impl ManifestEntry { mtime_nsec: m.st_mtime_nsec(), nlink: m.st_nlink(), size: if m.is_dir() { None } else { Some(m.st_size()) }, + sha256: hash, }) } } +fn file_checksum(path: &Path) -> std::io::Result { + let mut hasher = Sha256::new(); + + let file = File::open(path)?; + let mut reader = BufReader::new(file); + let mut buf = vec![0; BUF_SIZE]; + loop { + let n = reader.read(&mut buf)?; + if n == 0 { + break; + } + hasher.update(&buf[..n]); + } + let hash = hasher.finalize(); + Ok(format!("{:x}", hash)) +} + mod mode { use serde::{self, Serializer}; diff --git a/subplot/summain.py b/subplot/summain.py index 3ea6188..98eecf2 100644 --- a/subplot/summain.py +++ b/subplot/summain.py @@ -1,3 +1,4 @@ +import logging import os @@ -33,4 +34,7 @@ def output_matches_file(ctx, filename=None): actual = runcmd_get_stdout(ctx) expected = get_file(filename).decode("UTF-8") + logging.debug("output_matches:") + logging.debug(f" actual: {actual!r}") + logging.debug(f" expect: {expected!r}") assert_eq(actual, expected) diff --git a/summain.md b/summain.md index 7c2c5fa..e6a4d15 100644 --- a/summain.md +++ b/summain.md @@ -87,6 +87,7 @@ mtime: 456 mtime_nsec: 0 nlink: 2 size: ~ +sha256: ~ ``` ## Writeable file @@ -112,6 +113,7 @@ mtime: 22 mtime_nsec: 0 nlink: 1 size: 0 +sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 ``` ## Read-only file @@ -136,6 +138,7 @@ mtime: 44 mtime_nsec: 0 nlink: 1 size: 0 +sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 ``` ## Two files sorted @@ -163,6 +166,7 @@ mtime: 44 mtime_nsec: 0 nlink: 1 size: 0 +sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 --- path: bbb atime: 33 @@ -172,6 +176,7 @@ mtime: 44 mtime_nsec: 0 nlink: 1 size: 0 +sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 ``` --- -- cgit v1.2.1