summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2021-04-23 08:01:02 +0300
committerLars Wirzenius <liw@liw.fi>2021-04-28 11:31:40 +0300
commitfe5b650897f6715d40a29e72d821caf812f9edaa (patch)
tree44757e6c3139948b1738195a15eabf4f5c7ca828 /src
parenta21cd46fc76ca7cb1c87a218c7c561d668a64125 (diff)
downloadsummain-rs-fe5b650897f6715d40a29e72d821caf812f9edaa.tar.gz
refactor: use async for concurrency
Normal tokio async tasks for collecting metadata, blocking tasks for computing checksums, so that all available CPU can be used for that.
Diffstat (limited to 'src')
-rw-r--r--src/bin/summain.rs52
-rw-r--r--src/lib.rs28
2 files changed, 64 insertions, 16 deletions
diff --git a/src/bin/summain.rs b/src/bin/summain.rs
index 4336ebd..5031006 100644
--- a/src/bin/summain.rs
+++ b/src/bin/summain.rs
@@ -1,17 +1,51 @@
use anyhow::Context;
-use std::path::{Path, PathBuf};
+use std::path::PathBuf;
use structopt::StructOpt;
use summain::ManifestEntry;
-fn main() -> anyhow::Result<()> {
+#[tokio::main]
+async fn main() -> anyhow::Result<()> {
let mut opt = Opt::from_args();
opt.pathnames[..].sort();
- let v: Vec<anyhow::Result<ManifestEntry>> =
- opt.pathnames.iter().map(|p| manifest(&p)).collect();
- for m in v {
- let m = m?;
+
+ // Get metadata about all files, but don't compute checksum yet.
+ //
+ // This all runs in async worker threads, since it's theoretically
+ // I/O heavy and benefits from async task context switching. We
+ // create a task for each named file, since tasks are very cheap.
+ // We keep the JoinHandle for each task in a vector: there's not
+ // going to be so many handles that the vector becomes impossibly
+ // large: Linux only allows 128 KiB of command line arguments.
+
+ let mut handles = vec![];
+ for filename in opt.pathnames.iter().cloned() {
+ handles.push(tokio::spawn(async move { manifest(filename) }));
+ }
+
+ // Compute checksums for regular files.
+ //
+ // This runs in blocking threads, since it's CPU heavy. We create
+ // another vector of JoinHandles. Again, it won't become too large.
+
+ let mut sumhandles = vec![];
+ for h in handles {
+ let mut m: ManifestEntry = h.await?.await?;
+ let h = tokio::task::spawn_blocking(move || match m.compute_checksum() {
+ Err(e) => Err(e),
+ Ok(_) => Ok(m),
+ });
+ sumhandles.push(h)
+ }
+
+ // Wait for checksums to be available and print manifest.
+ //
+ // Note how this iterates over the results in the right order.
+
+ for h in sumhandles {
+ let m: ManifestEntry = h.await??;
print!("{}", serde_yaml::to_string(&m)?);
}
+
Ok(())
}
@@ -21,6 +55,8 @@ struct Opt {
pathnames: Vec<PathBuf>,
}
-fn manifest(path: &Path) -> anyhow::Result<ManifestEntry> {
- ManifestEntry::new(path).with_context(|| format!("{}", path.display()))
+async fn manifest(path: PathBuf) -> anyhow::Result<ManifestEntry> {
+ ManifestEntry::new(&path)
+ .await
+ .with_context(|| format!("{}", path.display()))
}
diff --git a/src/lib.rs b/src/lib.rs
index 3c90156..7075477 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -42,6 +42,14 @@ const BUF_SIZE: usize = 1024 * 1024;
/// An entry in a file manifest.
#[derive(Serialize, Debug)]
pub struct ManifestEntry {
+ #[serde(skip)]
+ is_regular: bool,
+
+ // Store the original name in a hidden field, for compute_checksum.
+ #[serde(skip)]
+ filename: PathBuf,
+
+ // We store pathname as a string so that we can handle non-UTF8 names.
path: String,
#[serde(with = "mode")]
mode: u32,
@@ -60,29 +68,33 @@ impl ManifestEntry {
/// caller. This function doesn't query the system for it.
///
/// The structure can be serialized using serde.
- pub fn new(path: &Path) -> std::io::Result<Self> {
+ pub async fn new(path: &Path) -> std::io::Result<Self> {
let m = symlink_metadata(path)?;
- let hash = if m.is_file() {
- Some(file_checksum(path)?)
- } else {
- None
- };
let target = if m.file_type().is_symlink() {
Some(read_link(path)?)
} else {
None
};
Ok(Self {
+ is_regular: m.is_file(),
+ filename: path.to_path_buf(),
path: path.to_string_lossy().into_owned(),
mode: m.st_mode(),
mtime: m.st_mtime(),
mtime_nsec: m.st_mtime_nsec(),
nlink: m.st_nlink(),
size: if m.is_dir() { None } else { Some(m.st_size()) },
- sha256: hash,
+ sha256: None,
target,
})
}
+
+ pub fn compute_checksum(&mut self) -> std::io::Result<()> {
+ if self.is_regular {
+ self.sha256 = Some(file_checksum(&self.filename)?);
+ }
+ Ok(())
+ }
}
fn file_checksum(path: &Path) -> std::io::Result<String> {
@@ -90,8 +102,8 @@ fn file_checksum(path: &Path) -> std::io::Result<String> {
let file = File::open(path)?;
let mut reader = BufReader::new(file);
- let mut buf = vec![0; BUF_SIZE];
loop {
+ let mut buf = vec![0; BUF_SIZE];
let n = reader.read(&mut buf)?;
if n == 0 {
break;