diff options
author | Lars Wirzenius <liw@liw.fi> | 2021-04-23 08:01:02 +0300 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2021-04-28 11:31:40 +0300 |
commit | fe5b650897f6715d40a29e72d821caf812f9edaa (patch) | |
tree | 44757e6c3139948b1738195a15eabf4f5c7ca828 | |
parent | a21cd46fc76ca7cb1c87a218c7c561d668a64125 (diff) | |
download | summain-rs-fe5b650897f6715d40a29e72d821caf812f9edaa.tar.gz |
refactor: use async for concurrency
Normal tokio async tasks for collecting metadata, blocking tasks for
computing checksums, so that all available CPU can be used for that.
-rw-r--r-- | Cargo.lock | 186 | ||||
-rw-r--r-- | Cargo.toml | 2 | ||||
-rw-r--r-- | src/bin/summain.rs | 52 | ||||
-rw-r--r-- | src/lib.rs | 28 |
4 files changed, 252 insertions, 16 deletions
@@ -27,6 +27,12 @@ dependencies = [ ] [[package]] +name = "autocfg" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" + +[[package]] name = "bitflags" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -42,6 +48,12 @@ dependencies = [ ] [[package]] +name = "bytes" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b700ce4376041dcd0a327fd0097c41095743c4c8af8887265942faf1100bd040" + +[[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -112,6 +124,15 @@ dependencies = [ ] [[package]] +name = "instant" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec" +dependencies = [ + "cfg-if", +] + +[[package]] name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -130,12 +151,114 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3" [[package]] +name = "lock_api" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a3c91c24eae6777794bb1997ad98bbb87daf92890acab859f7eaa4320333176" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "memchr" +version = "2.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" + +[[package]] +name = "mio" +version = "0.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf80d3e903b34e0bd7282b218398aec54e082c840d9baf8339e0080a0c542956" +dependencies = [ + "libc", + "log", + "miow", + "ntapi", + "winapi", +] + +[[package]] +name = "miow" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9f1c5b025cda876f66ef43a113f91ebc9f4ccef34843000e0adf6ebbab84e21" +dependencies = [ + "winapi", +] + +[[package]] +name = "ntapi" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f6bb902e437b6d86e03cce10a7e2af662292c5dfef23b65899ea3ac9354ad44" +dependencies = [ + "winapi", +] + +[[package]] +name = "num_cpus" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "once_cell" +version = "1.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af8b08b04175473088b46763e51ee54da5f9a164bc162f615b91bc179dbf15a3" + +[[package]] name = "opaque-debug" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" [[package]] +name = "parking_lot" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d7744ac029df22dca6284efe4e898991d28e3085c706c972bcd7da4a27a15eb" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018" +dependencies = [ + "cfg-if", + "instant", + "libc", + "redox_syscall", + "smallvec", + "winapi", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc0e1f259c92177c30a4c9d177246edd0a3568b25756a977d0632cf8fa37e905" + +[[package]] name = "proc-macro-error" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -178,6 +301,21 @@ dependencies = [ ] [[package]] +name = "redox_syscall" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8270314b5ccceb518e7e578952f0b72b88222d02e8f77f5ecf7abbb673539041" +dependencies = [ + "bitflags", +] + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] name = "serde" version = "1.0.123" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -223,6 +361,21 @@ dependencies = [ ] [[package]] +name = "signal-hook-registry" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16f1d0fef1604ba8f7a073c7e701f213e056707210e9020af4528e0101ce11a6" +dependencies = [ + "libc", +] + +[[package]] +name = "smallvec" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" + +[[package]] name = "strsim" version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -258,10 +411,12 @@ version = "0.26.0" dependencies = [ "anyhow", "digest", + "num_cpus", "serde", "serde_yaml", "sha2", "structopt", + "tokio", "unix_mode", ] @@ -286,6 +441,37 @@ dependencies = [ ] [[package]] +name = "tokio" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83f0c8e7c0addab50b663055baf787d0af7f413a46e6e7fb9559a4e4db7137a5" +dependencies = [ + "autocfg", + "bytes", + "libc", + "memchr", + "mio", + "num_cpus", + "once_cell", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "tokio-macros", + "winapi", +] + +[[package]] +name = "tokio-macros" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "caf7b11a536f46a809a8a9f0bb4237020f70ecbf115b842360afb127ea2fda57" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] name = "typenum" version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -13,8 +13,10 @@ edition = "2018" [dependencies] anyhow = "1" digest = "0.9" +num_cpus = "1" serde = { version = "1", features = ["derive"] } serde_yaml = "0.8" sha2 = "0.9" structopt = "0.3" +tokio = { version = "1", features = ["full"] } unix_mode = "0.1" diff --git a/src/bin/summain.rs b/src/bin/summain.rs index 4336ebd..5031006 100644 --- a/src/bin/summain.rs +++ b/src/bin/summain.rs @@ -1,17 +1,51 @@ use anyhow::Context; -use std::path::{Path, PathBuf}; +use std::path::PathBuf; use structopt::StructOpt; use summain::ManifestEntry; -fn main() -> anyhow::Result<()> { +#[tokio::main] +async fn main() -> anyhow::Result<()> { let mut opt = Opt::from_args(); opt.pathnames[..].sort(); - let v: Vec<anyhow::Result<ManifestEntry>> = - opt.pathnames.iter().map(|p| manifest(&p)).collect(); - for m in v { - let m = m?; + + // Get metadata about all files, but don't compute checksum yet. + // + // This all runs in async worker threads, since it's theoretically + // I/O heavy and benefits from async task context switching. We + // create a task for each named file, since tasks are very cheap. + // We keep the JoinHandle for each task in a vector: there's not + // going to be so many handles that the vector becomes impossibly + // large: Linux only allows 128 KiB of command line arguments. + + let mut handles = vec![]; + for filename in opt.pathnames.iter().cloned() { + handles.push(tokio::spawn(async move { manifest(filename) })); + } + + // Compute checksums for regular files. + // + // This runs in blocking threads, since it's CPU heavy. We create + // another vector of JoinHandles. Again, it won't become too large. + + let mut sumhandles = vec![]; + for h in handles { + let mut m: ManifestEntry = h.await?.await?; + let h = tokio::task::spawn_blocking(move || match m.compute_checksum() { + Err(e) => Err(e), + Ok(_) => Ok(m), + }); + sumhandles.push(h) + } + + // Wait for checksums to be available and print manifest. + // + // Note how this iterates over the results in the right order. + + for h in sumhandles { + let m: ManifestEntry = h.await??; print!("{}", serde_yaml::to_string(&m)?); } + Ok(()) } @@ -21,6 +55,8 @@ struct Opt { pathnames: Vec<PathBuf>, } -fn manifest(path: &Path) -> anyhow::Result<ManifestEntry> { - ManifestEntry::new(path).with_context(|| format!("{}", path.display())) +async fn manifest(path: PathBuf) -> anyhow::Result<ManifestEntry> { + ManifestEntry::new(&path) + .await + .with_context(|| format!("{}", path.display())) } @@ -42,6 +42,14 @@ const BUF_SIZE: usize = 1024 * 1024; /// An entry in a file manifest. #[derive(Serialize, Debug)] pub struct ManifestEntry { + #[serde(skip)] + is_regular: bool, + + // Store the original name in a hidden field, for compute_checksum. + #[serde(skip)] + filename: PathBuf, + + // We store pathname as a string so that we can handle non-UTF8 names. path: String, #[serde(with = "mode")] mode: u32, @@ -60,29 +68,33 @@ impl ManifestEntry { /// caller. This function doesn't query the system for it. /// /// The structure can be serialized using serde. - pub fn new(path: &Path) -> std::io::Result<Self> { + pub async fn new(path: &Path) -> std::io::Result<Self> { let m = symlink_metadata(path)?; - let hash = if m.is_file() { - Some(file_checksum(path)?) - } else { - None - }; let target = if m.file_type().is_symlink() { Some(read_link(path)?) } else { None }; Ok(Self { + is_regular: m.is_file(), + filename: path.to_path_buf(), path: path.to_string_lossy().into_owned(), mode: m.st_mode(), mtime: m.st_mtime(), mtime_nsec: m.st_mtime_nsec(), nlink: m.st_nlink(), size: if m.is_dir() { None } else { Some(m.st_size()) }, - sha256: hash, + sha256: None, target, }) } + + pub fn compute_checksum(&mut self) -> std::io::Result<()> { + if self.is_regular { + self.sha256 = Some(file_checksum(&self.filename)?); + } + Ok(()) + } } fn file_checksum(path: &Path) -> std::io::Result<String> { @@ -90,8 +102,8 @@ fn file_checksum(path: &Path) -> std::io::Result<String> { let file = File::open(path)?; let mut reader = BufReader::new(file); - let mut buf = vec![0; BUF_SIZE]; loop { + let mut buf = vec![0; BUF_SIZE]; let n = reader.read(&mut buf)?; if n == 0 { break; |