feat: generate test data with junk contents

Also, add "obnam generate-junk" command, for testing this. Sponsored-by: author
author: Lars Wirzenius <liw@liw.fi> 2021-12-26 18:49:25 +0200
committer: Lars Wirzenius <liw@liw.fi> 2021-12-26 19:21:03 +0200
commit: abfeccd0d839eef9e90397dcff40f0bf918ed477 (patch)
tree: fefd20b8aa3da4d7771a46121fc03d8fb1c5dad0
parent: 9f170606eb4fc24086274b446bb77cc1195280f0 (diff)
download: obnam-benchmark-abfeccd0d839eef9e90397dcff40f0bf918ed477.tar.gz
7 files changed, 103 insertions, 2 deletions
diff --git a/obnam-benchmark.md b/obnam-benchmark.md
index 709260d..2fc1452 100644
--- a/obnam-benchmark.md
+++ b/obnam-benchmark.md
@@ -71,6 +71,25 @@ file can describe synthetic benchmarks for different use cases.
 
 # Acceptance criteria
 
+## Generate random data for a backup
+
+_Requirement: The benchmark tool can generate random test data._
+
+We verify this by having the benchmark generate some test data, using
+a subcommand for this purpose. If the data doesn't compress well, we
+assume it's sufficiently random. We can safely assume that the same
+code is used to generate test data for benchmarks, though of course
+the scenario can't verify that. Instead, it can be verified via manual
+code inspection.
+
+~~~scenario
+given an installed Rust program obnam-benchmark
+when I run obnam-benchmark generate-junk 123000 junk.dat
+when I run gzip junk.dat
+then file junk.dat.gz is at least 100100 bytes long
+~~~
+
+
 ## Parses a specification file
 
 _Requirement: The benchmark tool can parse a specification file._
diff --git a/src/bin/obnam-benchmark.rs b/src/bin/obnam-benchmark.rs
index 85ef3f9..1a26ae0 100644
--- a/src/bin/obnam-benchmark.rs
+++ b/src/bin/obnam-benchmark.rs
@@ -1,4 +1,5 @@
 use log::{debug, error, info};
+use obnam_benchmark::junk::junk;
 use obnam_benchmark::result::Result;
 use obnam_benchmark::specification::Specification;
 use obnam_benchmark::suite::Suite;
@@ -24,6 +25,7 @@ fn real_main() -> anyhow::Result<()> {
     debug!("parsed: {:#?}", opt);
 
     match opt.cmd {
+        Command::GenerateJunk(x) => x.run()?,
         Command::Run(x) => x.run()?,
         Command::Spec(x) => x.run()?,
     }
@@ -44,6 +46,9 @@ enum Command {
 
     /// Dump the specification as JSON
     Spec(Spec),
+
+    /// Generate some junk data in a file.
+    GenerateJunk(GenerateJunk),
 }
 
 #[derive(Debug, StructOpt)]
@@ -98,3 +103,27 @@ impl Spec {
         Ok(())
     }
 }
+
+#[derive(Debug, StructOpt)]
+struct GenerateJunk {
+    /// Number of bytes of junk to create
+    #[structopt()]
+    bytes: u64,
+
+    /// Name of the output file
+    #[structopt(parse(from_os_str))]
+    filename: PathBuf,
+}
+
+impl GenerateJunk {
+    fn run(&self) -> anyhow::Result<()> {
+        info!(
+            "generating {} bytes of junk into {}",
+            self.bytes,
+            self.filename.display()
+        );
+        let mut output = File::create(&self.filename)?;
+        junk(&mut output, self.bytes)?;
+        Ok(())
+    }
+}
diff --git a/src/junk.rs b/src/junk.rs
new file mode 100644
index 0000000..77c676c
--- /dev/null
+++ b/src/junk.rs
@@ -0,0 +1,22 @@
+use rand::RngCore;
+use std::fs::File;
+use std::io::Write;
+
+const BUFSIZ: usize = 4096;
+
+pub fn junk(f: &mut File, n: u64) -> Result<(), std::io::Error> {
+    let mut remain = n;
+    while remain >= BUFSIZ as u64 {
+        write_junk(f, BUFSIZ)?;
+        remain -= BUFSIZ as u64;
+    }
+    write_junk(f, remain as usize)?;
+    Ok(())
+}
+
+fn write_junk(f: &mut File, n: usize) -> Result<(), std::io::Error> {
+    let mut bytes = [0u8; BUFSIZ];
+    rand::thread_rng().fill_bytes(&mut bytes);
+    f.write_all(&bytes[..n])?;
+    Ok(())
+}
diff --git a/src/lib.rs b/src/lib.rs
index 82e735b..e1e2b81 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -29,6 +29,7 @@
 //! not analyze the data. The data can be stored for later analysis.
 
 pub mod daemon;
+pub mod junk;
 pub mod obnam;
 pub mod result;
 pub mod server;
diff --git a/src/suite.rs b/src/suite.rs
index 0f49994..90ecf37 100644
--- a/src/suite.rs
+++ b/src/suite.rs
@@ -1,4 +1,5 @@
 use crate::daemon::DaemonManager;
+use crate::junk::junk;
 use crate::obnam::{Obnam, ObnamError};
 use crate::result::{Measurement, OpMeasurements, Operation};
 use crate::server::{ObnamServer, ObnamServerError};
@@ -151,12 +152,19 @@ impl Benchmark {
     fn create(&mut self, create: &Create) -> Result<OpMeasurements, SuiteError> {
         info!("creating {} test data files", create.files);
         let root = self.obnam().root();
-        debug!("creating {} files in {}", create.files, root.display());
+        debug!(
+            "creating {} files of {} bytes in {}",
+            create.files,
+            create.file_size,
+            root.display()
+        );
 
         for i in 0..create.files {
             let filename = root.join(format!("{}", i));
             debug!("creating {}", filename.display());
-            File::create(&filename).map_err(|err| SuiteError::CreateFile(filename, err))?;
+            let mut f =
+                File::create(&filename).map_err(|err| SuiteError::CreateFile(filename, err))?;
+            junk(&mut f, create.file_size)?;
         }
 
         Ok(OpMeasurements::new(self.name(), Operation::Create))
diff --git a/subplot/benchmark.rs b/subplot/benchmark.rs
index 566ce30..082e02b 100644
--- a/subplot/benchmark.rs
+++ b/subplot/benchmark.rs
@@ -72,3 +72,20 @@ fn read_json_file(filename: &Path) -> anyhow::Result<Value> {
     let file = File::open(filename)?;
     Ok(serde_json::from_reader(&file)?)
 }
+
+#[step]
+#[context(Datadir)]
+#[context(SubplotContext)]
+fn file_is_at_least_this_long(context: &ScenarioContext, filename: &str, number: u64) {
+    context.with(
+        |context: &Datadir| {
+            let filename = context.canonicalise_filename(&filename)?;
+            let meta = std::fs::metadata(&filename)?;
+            if meta.len() < number {
+                panic!("file is {} bytes, wanted at least {} bytes", meta.len(), number);
+            }
+            Ok(())
+        },
+        false,
+    )?;
+}
diff --git a/subplot/benchmark.yaml b/subplot/benchmark.yaml
index 70ebb54..dc3a136 100644
--- a/subplot/benchmark.yaml
+++ b/subplot/benchmark.yaml
@@ -12,3 +12,8 @@
   impl:
     rust:
       function: file_is_valid_json
+
+- then: file {filename} is at least {number:int} bytes long
+  impl:
+    rust:
+      function: file_is_at_least_this_long
author	Lars Wirzenius <liw@liw.fi>	2021-12-26 18:49:25 +0200
committer	Lars Wirzenius <liw@liw.fi>	2021-12-26 19:21:03 +0200
commit	abfeccd0d839eef9e90397dcff40f0bf918ed477 (patch)
tree	fefd20b8aa3da4d7771a46121fc03d8fb1c5dad0
parent	9f170606eb4fc24086274b446bb77cc1195280f0 (diff)
download	obnam-benchmark-abfeccd0d839eef9e90397dcff40f0bf918ed477.tar.gz