diff options
author | Lars Wirzenius <liw@liw.fi> | 2021-04-23 08:01:02 +0300 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2021-04-28 11:31:40 +0300 |
commit | fe5b650897f6715d40a29e72d821caf812f9edaa (patch) | |
tree | 44757e6c3139948b1738195a15eabf4f5c7ca828 /src | |
parent | a21cd46fc76ca7cb1c87a218c7c561d668a64125 (diff) | |
download | summain-rs-fe5b650897f6715d40a29e72d821caf812f9edaa.tar.gz |
refactor: use async for concurrency
Normal tokio async tasks for collecting metadata, blocking tasks for
computing checksums, so that all available CPU can be used for that.
Diffstat (limited to 'src')
-rw-r--r-- | src/bin/summain.rs | 52 | ||||
-rw-r--r-- | src/lib.rs | 28 |
2 files changed, 64 insertions, 16 deletions
diff --git a/src/bin/summain.rs b/src/bin/summain.rs index 4336ebd..5031006 100644 --- a/src/bin/summain.rs +++ b/src/bin/summain.rs @@ -1,17 +1,51 @@ use anyhow::Context; -use std::path::{Path, PathBuf}; +use std::path::PathBuf; use structopt::StructOpt; use summain::ManifestEntry; -fn main() -> anyhow::Result<()> { +#[tokio::main] +async fn main() -> anyhow::Result<()> { let mut opt = Opt::from_args(); opt.pathnames[..].sort(); - let v: Vec<anyhow::Result<ManifestEntry>> = - opt.pathnames.iter().map(|p| manifest(&p)).collect(); - for m in v { - let m = m?; + + // Get metadata about all files, but don't compute checksum yet. + // + // This all runs in async worker threads, since it's theoretically + // I/O heavy and benefits from async task context switching. We + // create a task for each named file, since tasks are very cheap. + // We keep the JoinHandle for each task in a vector: there's not + // going to be so many handles that the vector becomes impossibly + // large: Linux only allows 128 KiB of command line arguments. + + let mut handles = vec![]; + for filename in opt.pathnames.iter().cloned() { + handles.push(tokio::spawn(async move { manifest(filename) })); + } + + // Compute checksums for regular files. + // + // This runs in blocking threads, since it's CPU heavy. We create + // another vector of JoinHandles. Again, it won't become too large. + + let mut sumhandles = vec![]; + for h in handles { + let mut m: ManifestEntry = h.await?.await?; + let h = tokio::task::spawn_blocking(move || match m.compute_checksum() { + Err(e) => Err(e), + Ok(_) => Ok(m), + }); + sumhandles.push(h) + } + + // Wait for checksums to be available and print manifest. + // + // Note how this iterates over the results in the right order. + + for h in sumhandles { + let m: ManifestEntry = h.await??; print!("{}", serde_yaml::to_string(&m)?); } + Ok(()) } @@ -21,6 +55,8 @@ struct Opt { pathnames: Vec<PathBuf>, } -fn manifest(path: &Path) -> anyhow::Result<ManifestEntry> { - ManifestEntry::new(path).with_context(|| format!("{}", path.display())) +async fn manifest(path: PathBuf) -> anyhow::Result<ManifestEntry> { + ManifestEntry::new(&path) + .await + .with_context(|| format!("{}", path.display())) } @@ -42,6 +42,14 @@ const BUF_SIZE: usize = 1024 * 1024; /// An entry in a file manifest. #[derive(Serialize, Debug)] pub struct ManifestEntry { + #[serde(skip)] + is_regular: bool, + + // Store the original name in a hidden field, for compute_checksum. + #[serde(skip)] + filename: PathBuf, + + // We store pathname as a string so that we can handle non-UTF8 names. path: String, #[serde(with = "mode")] mode: u32, @@ -60,29 +68,33 @@ impl ManifestEntry { /// caller. This function doesn't query the system for it. /// /// The structure can be serialized using serde. - pub fn new(path: &Path) -> std::io::Result<Self> { + pub async fn new(path: &Path) -> std::io::Result<Self> { let m = symlink_metadata(path)?; - let hash = if m.is_file() { - Some(file_checksum(path)?) - } else { - None - }; let target = if m.file_type().is_symlink() { Some(read_link(path)?) } else { None }; Ok(Self { + is_regular: m.is_file(), + filename: path.to_path_buf(), path: path.to_string_lossy().into_owned(), mode: m.st_mode(), mtime: m.st_mtime(), mtime_nsec: m.st_mtime_nsec(), nlink: m.st_nlink(), size: if m.is_dir() { None } else { Some(m.st_size()) }, - sha256: hash, + sha256: None, target, }) } + + pub fn compute_checksum(&mut self) -> std::io::Result<()> { + if self.is_regular { + self.sha256 = Some(file_checksum(&self.filename)?); + } + Ok(()) + } } fn file_checksum(path: &Path) -> std::io::Result<String> { @@ -90,8 +102,8 @@ fn file_checksum(path: &Path) -> std::io::Result<String> { let file = File::open(path)?; let mut reader = BufReader::new(file); - let mut buf = vec![0; BUF_SIZE]; loop { + let mut buf = vec![0; BUF_SIZE]; let n = reader.read(&mut buf)?; if n == 0 { break; |