From 40fbe9ba35116ccff04251f6c52d1607f3480c6f Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sat, 2 Apr 2022 15:48:23 +0300 Subject: feat: add data structures for accumulating time measurements Sponsored-by: author --- src/accumulated_time.rs | 79 ++++++++++++++++++++++++++++++++ src/lib.rs | 2 + src/performance.rs | 118 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 199 insertions(+) create mode 100644 src/accumulated_time.rs create mode 100644 src/performance.rs diff --git a/src/accumulated_time.rs b/src/accumulated_time.rs new file mode 100644 index 0000000..e633a10 --- /dev/null +++ b/src/accumulated_time.rs @@ -0,0 +1,79 @@ +//! Measure accumulated time for various operations. + +use std::collections::HashMap; +use std::hash::Hash; +use std::sync::Mutex; +use std::time::Instant; + +/// Accumulated times for different clocks. +/// +/// The caller defines a clock type, usually an enum. +/// `AccumulatedTime` accumulates time for each possible clock. +/// Conceptually, every type of clock exists. If a type of clock +/// doesn't ever get created, it measures at 0 accumulated time. +#[derive(Debug)] +pub struct AccumulatedTime { + accumulated: Mutex>, +} + +#[derive(Debug, Default)] +struct ClockTime { + nanos: u128, + started: Option, +} + +impl AccumulatedTime { + /// Create a new accumulated time collector. + #[allow(clippy::new_without_default)] + pub fn new() -> Self { + Self { + accumulated: Mutex::new(HashMap::new()), + } + } + + /// Start a new clock of a given type to measure a span of time. + /// + /// The clock's measured time is added to the accumulator when the + /// clock is stopped. + pub fn start(&mut self, clock: T) { + let mut map = self.accumulated.lock().unwrap(); + let ct = map.entry(clock).or_insert_with(ClockTime::default); + assert!(ct.started.is_none()); + ct.started = Some(Instant::now()); + } + + /// Stop a running clock. + /// + /// Its run time is added to the accumulated time for that kind of clock. + pub fn stop(&mut self, clock: T) { + let mut map = self.accumulated.lock().unwrap(); + if let Some(mut ct) = map.get_mut(&clock) { + assert!(ct.started.is_some()); + if let Some(started) = ct.started.take() { + ct.nanos += started.elapsed().as_nanos(); + ct.started = None; + } + } + } + + /// Return the accumulated time for a type of clock, as whole seconds. + pub fn secs(&self, clock: T) -> u128 { + self.nanos(clock) / 1_000_000_000u128 + } + + /// Return the accumulated time for a type of clock, as nanoseconds. + /// + /// This includes the time spent in a currently running clock. + pub fn nanos(&self, clock: T) -> u128 { + let map = self.accumulated.lock().unwrap(); + if let Some(ct) = map.get(&clock) { + if let Some(started) = ct.started { + ct.nanos + started.elapsed().as_nanos() + } else { + ct.nanos + } + } else { + 0 + } + } +} diff --git a/src/lib.rs b/src/lib.rs index fdb318f..fd33a4e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,6 +5,7 @@ #![deny(missing_docs)] +pub mod accumulated_time; pub mod backup_progress; pub mod backup_reason; pub mod backup_run; @@ -34,3 +35,4 @@ pub mod schema; pub mod server; pub mod store; pub mod workqueue; +pub mod performance; diff --git a/src/performance.rs b/src/performance.rs new file mode 100644 index 0000000..79fa9ab --- /dev/null +++ b/src/performance.rs @@ -0,0 +1,118 @@ +//! Performance measurements from an Obnam run. + +use crate::accumulated_time::AccumulatedTime; +use log::info; + +/// The kinds of clocks we have. +#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)] +pub enum Clock { + /// The complete runtime of the program. + RunTime, + + /// Time spent downloading previous backup generations. + GenerationDownload, + + /// Time spent uploading backup generations. + GenerationUpload, + + /// Time spent checking if a chunk exists already on server. + HasChunk, + + /// Time spent computing splitting files into chunks. + Chunking, + + /// Time spent scanning live data. + Scanning, +} + +/// Collected measurements from this Obnam run. +#[derive(Debug)] +pub struct Performance { + args: Vec, + time: AccumulatedTime, + live_files: u64, + files_backed_up: u64, + chunks_uploaded: u64, + chunks_reused: u64, +} + +impl Default for Performance { + fn default() -> Self { + Self { + args: std::env::args().collect(), + time: AccumulatedTime::::new(), + live_files: 0, + files_backed_up: 0, + chunks_reused: 0, + chunks_uploaded: 0, + } + } +} + +impl Performance { + /// Log all performance measurements to the log file. + pub fn log(&self) { + info!("Performance measurements for this Obnam run"); + for (i, arg) in self.args.iter().enumerate() { + info!("argv[{}]={:?}", i, arg); + } + info!("Live files found: {}", self.live_files); + info!("Files backed up: {}", self.files_backed_up); + info!("Chunks uploaded: {}", self.chunks_uploaded); + info!("Chunks reused: {}", self.chunks_reused); + info!( + "Scanning live data (seconds): {}", + self.time.secs(Clock::Scanning) + ); + info!( + "Chunking live data (seconds): {}", + self.time.secs(Clock::Chunking) + ); + info!( + "Checking for duplicate chunks (seconds): {}", + self.time.secs(Clock::HasChunk) + ); + info!( + "Downloading previous generation (seconds): {}", + self.time.secs(Clock::GenerationDownload) + ); + info!( + "Uploading new generation (seconds): {}", + self.time.secs(Clock::GenerationUpload) + ); + info!( + "Complete run time (seconds): {}", + self.time.secs(Clock::RunTime) + ); + } + + /// Start a specific clock. + pub fn start(&mut self, clock: Clock) { + self.time.start(clock) + } + + /// Stop a specific clock. + pub fn stop(&mut self, clock: Clock) { + self.time.stop(clock) + } + + /// Increment number of live files. + pub fn found_live_files(&mut self, n: u64) { + self.live_files += n; + } + + /// Increment number of files backed up this run. + pub fn back_up_file(&mut self) { + self.files_backed_up += 1; + } + + /// Increment number of reused chunks. + pub fn reuse_chunk(&mut self) { + self.chunks_reused += 1; + } + + /// Increment number of uploaded chunks. + pub fn upload_chunk(&mut self) { + self.chunks_uploaded += 1; + } +} -- cgit v1.2.1 From dd13fde5ea8bd61b6b5c7f024df6733301864456 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sat, 2 Apr 2022 15:54:02 +0300 Subject: feat: collect and log some time metrics Log the complete run-time of the program, and the time spent downloading the previous generation, and uploading the new generation. Sponsored-by: author --- src/backup_run.rs | 7 +++++++ src/bin/obnam.rs | 11 ++++++++--- src/cmd/backup.rs | 23 ++++++++++++++++------- src/lib.rs | 2 +- src/performance.rs | 21 --------------------- 5 files changed, 32 insertions(+), 32 deletions(-) diff --git a/src/backup_run.rs b/src/backup_run.rs index 21140ba..29e82fc 100644 --- a/src/backup_run.rs +++ b/src/backup_run.rs @@ -15,6 +15,7 @@ use crate::fsiter::{AnnotatedFsEntry, FsIterError, FsIterator}; use crate::generation::{ GenId, LocalGeneration, LocalGenerationError, NascentError, NascentGeneration, }; +use crate::performance::{Clock, Performance}; use crate::policy::BackupPolicy; use crate::schema::SchemaVersion; @@ -129,6 +130,7 @@ impl<'a> BackupRun<'a> { &mut self, genid: Option<&GenId>, oldname: &Path, + perf: &mut Performance, ) -> Result { match genid { None => { @@ -140,7 +142,9 @@ impl<'a> BackupRun<'a> { Ok(LocalGeneration::open(oldname)?) } Some(genid) => { + perf.start(Clock::GenerationDownload); let old = self.fetch_previous_generation(genid, oldname).await?; + perf.stop(Clock::GenerationDownload); let progress = BackupProgress::incremental(); progress.files_in_previous_generation(old.file_count()? as u64); @@ -176,6 +180,7 @@ impl<'a> BackupRun<'a> { old: &LocalGeneration, newpath: &Path, schema: SchemaVersion, + perf: &mut Performance, ) -> Result { let mut warnings: Vec = vec![]; let mut new_cachedir_tags = vec![]; @@ -204,7 +209,9 @@ impl<'a> BackupRun<'a> { count }; self.finish(); + perf.start(Clock::GenerationUpload); let gen_id = self.upload_nascent_generation(newpath).await?; + perf.stop(Clock::GenerationUpload); let gen_id = GenId::from_chunk_id(gen_id); Ok(RootsBackupOutcome { files_count, diff --git a/src/bin/obnam.rs b/src/bin/obnam.rs index 15c7d06..089a7a1 100644 --- a/src/bin/obnam.rs +++ b/src/bin/obnam.rs @@ -17,6 +17,7 @@ use obnam::cmd::restore::Restore; use obnam::cmd::show_config::ShowConfig; use obnam::cmd::show_gen::ShowGeneration; use obnam::config::ClientConfig; +use obnam::performance::{Clock, Performance}; use std::path::{Path, PathBuf}; use structopt::StructOpt; @@ -25,14 +26,18 @@ const ORG: &str = ""; const APPLICATION: &str = "obnam"; fn main() { - if let Err(err) = main_program() { + let mut perf = Performance::default(); + perf.start(Clock::RunTime); + if let Err(err) = main_program(&mut perf) { error!("{}", err); eprintln!("ERROR: {}", err); std::process::exit(1); } + perf.stop(Clock::RunTime); + perf.log(); } -fn main_program() -> anyhow::Result<()> { +fn main_program(perf: &mut Performance) -> anyhow::Result<()> { let opt = Opt::from_args(); let config = ClientConfig::read(&config_filename(&opt))?; setup_logging(&config.log)?; @@ -44,7 +49,7 @@ fn main_program() -> anyhow::Result<()> { match opt.cmd { Command::Init(x) => x.run(&config), Command::ListBackupVersions(x) => x.run(&config), - Command::Backup(x) => x.run(&config), + Command::Backup(x) => x.run(&config, perf), Command::Inspect(x) => x.run(&config), Command::Chunkify(x) => x.run(&config), Command::List(x) => x.run(&config), diff --git a/src/cmd/backup.rs b/src/cmd/backup.rs index 7e772b1..8a85703 100644 --- a/src/cmd/backup.rs +++ b/src/cmd/backup.rs @@ -7,6 +7,7 @@ use crate::config::ClientConfig; use crate::dbgen::{schema_version, FileId, DEFAULT_SCHEMA_MAJOR}; use crate::error::ObnamError; use crate::generation::GenId; +use crate::performance::{Clock, Performance}; use crate::schema::VersionComponent; use log::info; @@ -25,12 +26,16 @@ pub struct Backup { impl Backup { /// Run the command. - pub fn run(&self, config: &ClientConfig) -> Result<(), ObnamError> { + pub fn run(&self, config: &ClientConfig, perf: &mut Performance) -> Result<(), ObnamError> { let rt = Runtime::new()?; - rt.block_on(self.run_async(config)) + rt.block_on(self.run_async(config, perf)) } - async fn run_async(&self, config: &ClientConfig) -> Result<(), ObnamError> { + async fn run_async( + &self, + config: &ClientConfig, + perf: &mut Performance, + ) -> Result<(), ObnamError> { let runtime = SystemTime::now(); let major = self.backup_version.or(Some(DEFAULT_SCHEMA_MAJOR)).unwrap(); @@ -52,28 +57,32 @@ impl Backup { Err(_) => { info!("fresh backup without a previous generation"); let mut run = BackupRun::initial(config, &client)?; - let old = run.start(None, &oldtemp).await?; + let old = run.start(None, &oldtemp, perf).await?; ( false, - run.backup_roots(config, &old, &newtemp, schema).await?, + run.backup_roots(config, &old, &newtemp, schema, perf) + .await?, ) } Ok(old_id) => { info!("incremental backup based on {}", old_id); let mut run = BackupRun::incremental(config, &client)?; - let old = run.start(Some(&old_id), &oldtemp).await?; + let old = run.start(Some(&old_id), &oldtemp, perf).await?; ( true, - run.backup_roots(config, &old, &newtemp, schema).await?, + run.backup_roots(config, &old, &newtemp, schema, perf) + .await?, ) } }; + perf.start(Clock::GenerationUpload); let mut trust = trust; trust.append_backup(outcome.gen_id.as_chunk_id()); trust.finalize(current_timestamp()); let trust = trust.to_data_chunk()?; let trust_id = client.upload_chunk(trust).await?; + perf.stop(Clock::GenerationUpload); info!("uploaded new client-trust {}", trust_id); for w in outcome.warnings.iter() { diff --git a/src/lib.rs b/src/lib.rs index fd33a4e..a0a53c7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,9 +30,9 @@ pub mod genmeta; pub mod index; pub mod indexedstore; pub mod passwords; +pub mod performance; pub mod policy; pub mod schema; pub mod server; pub mod store; pub mod workqueue; -pub mod performance; diff --git a/src/performance.rs b/src/performance.rs index 79fa9ab..29c2328 100644 --- a/src/performance.rs +++ b/src/performance.rs @@ -14,15 +14,6 @@ pub enum Clock { /// Time spent uploading backup generations. GenerationUpload, - - /// Time spent checking if a chunk exists already on server. - HasChunk, - - /// Time spent computing splitting files into chunks. - Chunking, - - /// Time spent scanning live data. - Scanning, } /// Collected measurements from this Obnam run. @@ -60,18 +51,6 @@ impl Performance { info!("Files backed up: {}", self.files_backed_up); info!("Chunks uploaded: {}", self.chunks_uploaded); info!("Chunks reused: {}", self.chunks_reused); - info!( - "Scanning live data (seconds): {}", - self.time.secs(Clock::Scanning) - ); - info!( - "Chunking live data (seconds): {}", - self.time.secs(Clock::Chunking) - ); - info!( - "Checking for duplicate chunks (seconds): {}", - self.time.secs(Clock::HasChunk) - ); info!( "Downloading previous generation (seconds): {}", self.time.secs(Clock::GenerationDownload) -- cgit v1.2.1