summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2021-04-28 08:35:09 +0000
committerLars Wirzenius <liw@liw.fi>2021-04-28 08:35:09 +0000
commitcc5baf3cf6fefe3433e5310ff1aa339f3ee67443 (patch)
tree44757e6c3139948b1738195a15eabf4f5c7ca828
parent5789af180a7f65d3fedd93be49cd8f767912eab7 (diff)
parentfe5b650897f6715d40a29e72d821caf812f9edaa (diff)
downloadsummain-rs-cc5baf3cf6fefe3433e5310ff1aa339f3ee67443.tar.gz
Merge branch 'async' into 'main'
refactor: use async for concurrency See merge request larswirzenius/summain!19
-rw-r--r--Cargo.lock237
-rw-r--r--Cargo.toml3
-rw-r--r--src/bin/summain.rs53
-rw-r--r--src/lib.rs28
4 files changed, 220 insertions, 101 deletions
diff --git a/Cargo.lock b/Cargo.lock
index d3a5a8d..a8b2a30 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -48,6 +48,12 @@ dependencies = [
]
[[package]]
+name = "bytes"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b700ce4376041dcd0a327fd0097c41095743c4c8af8887265942faf1100bd040"
+
+[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -69,64 +75,12 @@ dependencies = [
]
[[package]]
-name = "const_fn"
-version = "0.4.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "28b9d6de7f49e22cf97ad17fc4036ece69300032f45f78f30b4a4482cdc3f4a6"
-
-[[package]]
name = "cpuid-bool"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8aebca1129a03dc6dc2b127edd729435bbc4a37e1d5f4d7513165089ceb02634"
[[package]]
-name = "crossbeam-channel"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dca26ee1f8d361640700bde38b2c37d8c22b3ce2d360e1fc1c74ea4b0aa7d775"
-dependencies = [
- "cfg-if",
- "crossbeam-utils",
-]
-
-[[package]]
-name = "crossbeam-deque"
-version = "0.8.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9"
-dependencies = [
- "cfg-if",
- "crossbeam-epoch",
- "crossbeam-utils",
-]
-
-[[package]]
-name = "crossbeam-epoch"
-version = "0.9.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1aaa739f95311c2c7887a76863f500026092fb1dce0161dab577e559ef3569d"
-dependencies = [
- "cfg-if",
- "const_fn",
- "crossbeam-utils",
- "lazy_static",
- "memoffset",
- "scopeguard",
-]
-
-[[package]]
-name = "crossbeam-utils"
-version = "0.8.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "02d96d1e189ef58269ebe5b97953da3274d83a93af647c2ddd6f9dab28cedb8d"
-dependencies = [
- "autocfg",
- "cfg-if",
- "lazy_static",
-]
-
-[[package]]
name = "digest"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -142,12 +96,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88d7ed2934d741c6b37e33e3832298e8850b53fd2d2bea03873375596c7cea4e"
[[package]]
-name = "either"
-version = "1.6.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
-
-[[package]]
name = "generic-array"
version = "0.14.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -176,6 +124,15 @@ dependencies = [
]
[[package]]
+name = "instant"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -194,12 +151,58 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3"
[[package]]
-name = "memoffset"
-version = "0.6.1"
+name = "lock_api"
+version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "157b4208e3059a8f9e78d559edc658e13df41410cb3ae03979c83130067fdd87"
+checksum = "5a3c91c24eae6777794bb1997ad98bbb87daf92890acab859f7eaa4320333176"
dependencies = [
- "autocfg",
+ "scopeguard",
+]
+
+[[package]]
+name = "log"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "memchr"
+version = "2.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525"
+
+[[package]]
+name = "mio"
+version = "0.7.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf80d3e903b34e0bd7282b218398aec54e082c840d9baf8339e0080a0c542956"
+dependencies = [
+ "libc",
+ "log",
+ "miow",
+ "ntapi",
+ "winapi",
+]
+
+[[package]]
+name = "miow"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9f1c5b025cda876f66ef43a113f91ebc9f4ccef34843000e0adf6ebbab84e21"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "ntapi"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f6bb902e437b6d86e03cce10a7e2af662292c5dfef23b65899ea3ac9354ad44"
+dependencies = [
+ "winapi",
]
[[package]]
@@ -213,12 +216,49 @@ dependencies = [
]
[[package]]
+name = "once_cell"
+version = "1.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af8b08b04175473088b46763e51ee54da5f9a164bc162f615b91bc179dbf15a3"
+
+[[package]]
name = "opaque-debug"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5"
[[package]]
+name = "parking_lot"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6d7744ac029df22dca6284efe4e898991d28e3085c706c972bcd7da4a27a15eb"
+dependencies = [
+ "instant",
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018"
+dependencies = [
+ "cfg-if",
+ "instant",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "winapi",
+]
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc0e1f259c92177c30a4c9d177246edd0a3568b25756a977d0632cf8fa37e905"
+
+[[package]]
name = "proc-macro-error"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -261,28 +301,12 @@ dependencies = [
]
[[package]]
-name = "rayon"
-version = "1.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b0d8e0819fadc20c74ea8373106ead0600e3a67ef1fe8da56e39b9ae7275674"
-dependencies = [
- "autocfg",
- "crossbeam-deque",
- "either",
- "rayon-core",
-]
-
-[[package]]
-name = "rayon-core"
-version = "1.9.0"
+name = "redox_syscall"
+version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ab346ac5921dc62ffa9f89b7a773907511cdfa5490c572ae9be1be33e8afa4a"
+checksum = "8270314b5ccceb518e7e578952f0b72b88222d02e8f77f5ecf7abbb673539041"
dependencies = [
- "crossbeam-channel",
- "crossbeam-deque",
- "crossbeam-utils",
- "lazy_static",
- "num_cpus",
+ "bitflags",
]
[[package]]
@@ -337,6 +361,21 @@ dependencies = [
]
[[package]]
+name = "signal-hook-registry"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "16f1d0fef1604ba8f7a073c7e701f213e056707210e9020af4528e0101ce11a6"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "smallvec"
+version = "1.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e"
+
+[[package]]
name = "strsim"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -372,11 +411,12 @@ version = "0.26.0"
dependencies = [
"anyhow",
"digest",
- "rayon",
+ "num_cpus",
"serde",
"serde_yaml",
"sha2",
"structopt",
+ "tokio",
"unix_mode",
]
@@ -401,6 +441,37 @@ dependencies = [
]
[[package]]
+name = "tokio"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "83f0c8e7c0addab50b663055baf787d0af7f413a46e6e7fb9559a4e4db7137a5"
+dependencies = [
+ "autocfg",
+ "bytes",
+ "libc",
+ "memchr",
+ "mio",
+ "num_cpus",
+ "once_cell",
+ "parking_lot",
+ "pin-project-lite",
+ "signal-hook-registry",
+ "tokio-macros",
+ "winapi",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "caf7b11a536f46a809a8a9f0bb4237020f70ecbf115b842360afb127ea2fda57"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
name = "typenum"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/Cargo.toml b/Cargo.toml
index e68e266..444263f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,9 +13,10 @@ edition = "2018"
[dependencies]
anyhow = "1"
digest = "0.9"
-rayon = "1.5"
+num_cpus = "1"
serde = { version = "1", features = ["derive"] }
serde_yaml = "0.8"
sha2 = "0.9"
structopt = "0.3"
+tokio = { version = "1", features = ["full"] }
unix_mode = "0.1"
diff --git a/src/bin/summain.rs b/src/bin/summain.rs
index a2e2b7a..5031006 100644
--- a/src/bin/summain.rs
+++ b/src/bin/summain.rs
@@ -1,18 +1,51 @@
use anyhow::Context;
-use rayon::prelude::*;
-use std::path::{Path, PathBuf};
+use std::path::PathBuf;
use structopt::StructOpt;
use summain::ManifestEntry;
-fn main() -> anyhow::Result<()> {
+#[tokio::main]
+async fn main() -> anyhow::Result<()> {
let mut opt = Opt::from_args();
opt.pathnames[..].sort();
- let v: Vec<anyhow::Result<ManifestEntry>> =
- opt.pathnames.par_iter().map(|p| manifest(&p)).collect();
- for m in v {
- let m = m?;
+
+ // Get metadata about all files, but don't compute checksum yet.
+ //
+ // This all runs in async worker threads, since it's theoretically
+ // I/O heavy and benefits from async task context switching. We
+ // create a task for each named file, since tasks are very cheap.
+ // We keep the JoinHandle for each task in a vector: there's not
+ // going to be so many handles that the vector becomes impossibly
+ // large: Linux only allows 128 KiB of command line arguments.
+
+ let mut handles = vec![];
+ for filename in opt.pathnames.iter().cloned() {
+ handles.push(tokio::spawn(async move { manifest(filename) }));
+ }
+
+ // Compute checksums for regular files.
+ //
+ // This runs in blocking threads, since it's CPU heavy. We create
+ // another vector of JoinHandles. Again, it won't become too large.
+
+ let mut sumhandles = vec![];
+ for h in handles {
+ let mut m: ManifestEntry = h.await?.await?;
+ let h = tokio::task::spawn_blocking(move || match m.compute_checksum() {
+ Err(e) => Err(e),
+ Ok(_) => Ok(m),
+ });
+ sumhandles.push(h)
+ }
+
+ // Wait for checksums to be available and print manifest.
+ //
+ // Note how this iterates over the results in the right order.
+
+ for h in sumhandles {
+ let m: ManifestEntry = h.await??;
print!("{}", serde_yaml::to_string(&m)?);
}
+
Ok(())
}
@@ -22,6 +55,8 @@ struct Opt {
pathnames: Vec<PathBuf>,
}
-fn manifest(path: &Path) -> anyhow::Result<ManifestEntry> {
- ManifestEntry::new(path).with_context(|| format!("{}", path.display()))
+async fn manifest(path: PathBuf) -> anyhow::Result<ManifestEntry> {
+ ManifestEntry::new(&path)
+ .await
+ .with_context(|| format!("{}", path.display()))
}
diff --git a/src/lib.rs b/src/lib.rs
index 3c90156..7075477 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -42,6 +42,14 @@ const BUF_SIZE: usize = 1024 * 1024;
/// An entry in a file manifest.
#[derive(Serialize, Debug)]
pub struct ManifestEntry {
+ #[serde(skip)]
+ is_regular: bool,
+
+ // Store the original name in a hidden field, for compute_checksum.
+ #[serde(skip)]
+ filename: PathBuf,
+
+ // We store pathname as a string so that we can handle non-UTF8 names.
path: String,
#[serde(with = "mode")]
mode: u32,
@@ -60,29 +68,33 @@ impl ManifestEntry {
/// caller. This function doesn't query the system for it.
///
/// The structure can be serialized using serde.
- pub fn new(path: &Path) -> std::io::Result<Self> {
+ pub async fn new(path: &Path) -> std::io::Result<Self> {
let m = symlink_metadata(path)?;
- let hash = if m.is_file() {
- Some(file_checksum(path)?)
- } else {
- None
- };
let target = if m.file_type().is_symlink() {
Some(read_link(path)?)
} else {
None
};
Ok(Self {
+ is_regular: m.is_file(),
+ filename: path.to_path_buf(),
path: path.to_string_lossy().into_owned(),
mode: m.st_mode(),
mtime: m.st_mtime(),
mtime_nsec: m.st_mtime_nsec(),
nlink: m.st_nlink(),
size: if m.is_dir() { None } else { Some(m.st_size()) },
- sha256: hash,
+ sha256: None,
target,
})
}
+
+ pub fn compute_checksum(&mut self) -> std::io::Result<()> {
+ if self.is_regular {
+ self.sha256 = Some(file_checksum(&self.filename)?);
+ }
+ Ok(())
+ }
}
fn file_checksum(path: &Path) -> std::io::Result<String> {
@@ -90,8 +102,8 @@ fn file_checksum(path: &Path) -> std::io::Result<String> {
let file = File::open(path)?;
let mut reader = BufReader::new(file);
- let mut buf = vec![0; BUF_SIZE];
loop {
+ let mut buf = vec![0; BUF_SIZE];
let n = reader.read(&mut buf)?;
if n == 0 {
break;