From abfeccd0d839eef9e90397dcff40f0bf918ed477 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sun, 26 Dec 2021 18:49:25 +0200 Subject: feat: generate test data with junk contents Also, add "obnam generate-junk" command, for testing this. Sponsored-by: author --- obnam-benchmark.md | 19 +++++++++++++++++++ src/bin/obnam-benchmark.rs | 29 +++++++++++++++++++++++++++++ src/junk.rs | 22 ++++++++++++++++++++++ src/lib.rs | 1 + src/suite.rs | 12 ++++++++++-- subplot/benchmark.rs | 17 +++++++++++++++++ subplot/benchmark.yaml | 5 +++++ 7 files changed, 103 insertions(+), 2 deletions(-) create mode 100644 src/junk.rs diff --git a/obnam-benchmark.md b/obnam-benchmark.md index 709260d..2fc1452 100644 --- a/obnam-benchmark.md +++ b/obnam-benchmark.md @@ -71,6 +71,25 @@ file can describe synthetic benchmarks for different use cases. # Acceptance criteria +## Generate random data for a backup + +_Requirement: The benchmark tool can generate random test data._ + +We verify this by having the benchmark generate some test data, using +a subcommand for this purpose. If the data doesn't compress well, we +assume it's sufficiently random. We can safely assume that the same +code is used to generate test data for benchmarks, though of course +the scenario can't verify that. Instead, it can be verified via manual +code inspection. + +~~~scenario +given an installed Rust program obnam-benchmark +when I run obnam-benchmark generate-junk 123000 junk.dat +when I run gzip junk.dat +then file junk.dat.gz is at least 100100 bytes long +~~~ + + ## Parses a specification file _Requirement: The benchmark tool can parse a specification file._ diff --git a/src/bin/obnam-benchmark.rs b/src/bin/obnam-benchmark.rs index 85ef3f9..1a26ae0 100644 --- a/src/bin/obnam-benchmark.rs +++ b/src/bin/obnam-benchmark.rs @@ -1,4 +1,5 @@ use log::{debug, error, info}; +use obnam_benchmark::junk::junk; use obnam_benchmark::result::Result; use obnam_benchmark::specification::Specification; use obnam_benchmark::suite::Suite; @@ -24,6 +25,7 @@ fn real_main() -> anyhow::Result<()> { debug!("parsed: {:#?}", opt); match opt.cmd { + Command::GenerateJunk(x) => x.run()?, Command::Run(x) => x.run()?, Command::Spec(x) => x.run()?, } @@ -44,6 +46,9 @@ enum Command { /// Dump the specification as JSON Spec(Spec), + + /// Generate some junk data in a file. + GenerateJunk(GenerateJunk), } #[derive(Debug, StructOpt)] @@ -98,3 +103,27 @@ impl Spec { Ok(()) } } + +#[derive(Debug, StructOpt)] +struct GenerateJunk { + /// Number of bytes of junk to create + #[structopt()] + bytes: u64, + + /// Name of the output file + #[structopt(parse(from_os_str))] + filename: PathBuf, +} + +impl GenerateJunk { + fn run(&self) -> anyhow::Result<()> { + info!( + "generating {} bytes of junk into {}", + self.bytes, + self.filename.display() + ); + let mut output = File::create(&self.filename)?; + junk(&mut output, self.bytes)?; + Ok(()) + } +} diff --git a/src/junk.rs b/src/junk.rs new file mode 100644 index 0000000..77c676c --- /dev/null +++ b/src/junk.rs @@ -0,0 +1,22 @@ +use rand::RngCore; +use std::fs::File; +use std::io::Write; + +const BUFSIZ: usize = 4096; + +pub fn junk(f: &mut File, n: u64) -> Result<(), std::io::Error> { + let mut remain = n; + while remain >= BUFSIZ as u64 { + write_junk(f, BUFSIZ)?; + remain -= BUFSIZ as u64; + } + write_junk(f, remain as usize)?; + Ok(()) +} + +fn write_junk(f: &mut File, n: usize) -> Result<(), std::io::Error> { + let mut bytes = [0u8; BUFSIZ]; + rand::thread_rng().fill_bytes(&mut bytes); + f.write_all(&bytes[..n])?; + Ok(()) +} diff --git a/src/lib.rs b/src/lib.rs index 82e735b..e1e2b81 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,6 +29,7 @@ //! not analyze the data. The data can be stored for later analysis. pub mod daemon; +pub mod junk; pub mod obnam; pub mod result; pub mod server; diff --git a/src/suite.rs b/src/suite.rs index 0f49994..90ecf37 100644 --- a/src/suite.rs +++ b/src/suite.rs @@ -1,4 +1,5 @@ use crate::daemon::DaemonManager; +use crate::junk::junk; use crate::obnam::{Obnam, ObnamError}; use crate::result::{Measurement, OpMeasurements, Operation}; use crate::server::{ObnamServer, ObnamServerError}; @@ -151,12 +152,19 @@ impl Benchmark { fn create(&mut self, create: &Create) -> Result { info!("creating {} test data files", create.files); let root = self.obnam().root(); - debug!("creating {} files in {}", create.files, root.display()); + debug!( + "creating {} files of {} bytes in {}", + create.files, + create.file_size, + root.display() + ); for i in 0..create.files { let filename = root.join(format!("{}", i)); debug!("creating {}", filename.display()); - File::create(&filename).map_err(|err| SuiteError::CreateFile(filename, err))?; + let mut f = + File::create(&filename).map_err(|err| SuiteError::CreateFile(filename, err))?; + junk(&mut f, create.file_size)?; } Ok(OpMeasurements::new(self.name(), Operation::Create)) diff --git a/subplot/benchmark.rs b/subplot/benchmark.rs index 566ce30..082e02b 100644 --- a/subplot/benchmark.rs +++ b/subplot/benchmark.rs @@ -72,3 +72,20 @@ fn read_json_file(filename: &Path) -> anyhow::Result { let file = File::open(filename)?; Ok(serde_json::from_reader(&file)?) } + +#[step] +#[context(Datadir)] +#[context(SubplotContext)] +fn file_is_at_least_this_long(context: &ScenarioContext, filename: &str, number: u64) { + context.with( + |context: &Datadir| { + let filename = context.canonicalise_filename(&filename)?; + let meta = std::fs::metadata(&filename)?; + if meta.len() < number { + panic!("file is {} bytes, wanted at least {} bytes", meta.len(), number); + } + Ok(()) + }, + false, + )?; +} diff --git a/subplot/benchmark.yaml b/subplot/benchmark.yaml index 70ebb54..dc3a136 100644 --- a/subplot/benchmark.yaml +++ b/subplot/benchmark.yaml @@ -12,3 +12,8 @@ impl: rust: function: file_is_valid_json + +- then: file {filename} is at least {number:int} bytes long + impl: + rust: + function: file_is_at_least_this_long -- cgit v1.2.1