summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2020-11-27 16:57:21 +0200
committerLars Wirzenius <liw@liw.fi>2020-11-28 10:37:40 +0200
commitaa6709b2f657a964c2af04f6370e7eb7c117bf9e (patch)
tree9e0f5ba56f45eb24d286ac38a67b27ed0899f9d6
parentb5b5884097219d77aa0b4cd6ad4d3a9c1407f5a6 (diff)
downloadsummain-rs-aa6709b2f657a964c2af04f6370e7eb7c117bf9e.tar.gz
feat: implement Summain in Rust
-rw-r--r--.gitignore5
-rw-r--r--Cargo.toml5
-rwxr-xr-xcheck35
-rw-r--r--src/lib.rs1
-rw-r--r--src/main.rs70
-rw-r--r--subplot/runcmd.py252
-rw-r--r--subplot/runcmd.yaml83
-rw-r--r--subplot/summain.py36
-rw-r--r--subplot/summain.yaml17
-rw-r--r--summain.md187
10 files changed, 689 insertions, 2 deletions
diff --git a/.gitignore b/.gitignore
index ea8c4bf..92aa39b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,6 @@
/target
+Cargo.lock
+summain.html
+summain.pdf
+test.log
+test.py
diff --git a/Cargo.toml b/Cargo.toml
index 61ed129..b570500 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,3 +7,8 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
+anyhow = "1"
+serde = { version = "1", features = ["derive"] }
+serde_yaml = "0.8"
+structopt = "0.3"
+unix_mode = "0.1"
diff --git a/check b/check
new file mode 100755
index 0000000..0445bf5
--- /dev/null
+++ b/check
@@ -0,0 +1,35 @@
+#!/bin/bash
+#
+# Run automated tests for the project.
+
+set -euo pipefail
+
+quiet=-q
+hideok=chronic
+if [ "$#" -gt 0 ]
+then
+ case "$1" in
+ verbose | -v | --verbose)
+ quiet=
+ hideok=
+ ;;
+ esac
+fi
+
+got_cargo_cmd()
+{
+ cargo --list | grep " $1 " > /dev/null
+}
+
+cargo build --all-targets $quiet
+got_cargo_cmd clippy && cargo clippy $quiet
+got_cargo_cmd fmt && cargo fmt -- --check
+
+sp-docgen summain.md -o summain.html
+sp-docgen summain.md -o summain.pdf
+
+sp-codegen summain.md -o test.py
+rm -f test.log
+$hideok python3 test.py --log test.log "$@"
+
+echo "Everything seems to be in order."
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1 @@
+
diff --git a/src/main.rs b/src/main.rs
index e7a11a9..fd23eb8 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,3 +1,69 @@
-fn main() {
- println!("Hello, world!");
+use anyhow::Context;
+use serde::Serialize;
+use std::fs::symlink_metadata;
+use std::fs::Metadata;
+use std::os::linux::fs::MetadataExt;
+use std::path::{Path, PathBuf};
+use structopt::StructOpt;
+
+fn main() -> anyhow::Result<()> {
+ let mut opt = Opt::from_args();
+ opt.pathnames[..].sort();
+ for pathname in opt.pathnames {
+ report(&pathname).with_context(|| format!("{}", pathname.display()))?
+ }
+ Ok(())
+}
+
+#[derive(StructOpt, Debug)]
+struct Opt {
+ #[structopt(parse(from_os_str))]
+ pathnames: Vec<PathBuf>,
+}
+
+#[derive(Serialize, Debug)]
+struct Entry {
+ path: PathBuf,
+ atime: i64,
+ atime_nsec: i64,
+ #[serde(with = "mode")]
+ mode: u32,
+ mtime: i64,
+ mtime_nsec: i64,
+ nlink: u64,
+ size: Option<u64>,
+}
+
+impl Entry {
+ fn new(path: &Path, m: Metadata) -> Self {
+ Self {
+ path: path.to_path_buf(),
+ atime: m.st_atime(),
+ atime_nsec: m.st_atime_nsec(),
+ mode: m.st_mode(),
+ mtime: m.st_mtime(),
+ mtime_nsec: m.st_mtime_nsec(),
+ nlink: m.st_nlink(),
+ size: if m.is_dir() { None } else { Some(m.st_size()) },
+ }
+ }
+}
+
+fn report(pathname: &Path) -> anyhow::Result<()> {
+ let m = symlink_metadata(pathname)?;
+ let e = Entry::new(pathname, m);
+ println!("{}", serde_yaml::to_string(&e)?);
+ Ok(())
+}
+
+mod mode {
+ use serde::{self, Serializer};
+
+ pub fn serialize<S>(mode: &u32, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ let s = unix_mode::to_string(*mode);
+ serializer.serialize_str(&s)
+ }
}
diff --git a/subplot/runcmd.py b/subplot/runcmd.py
new file mode 100644
index 0000000..a2564c6
--- /dev/null
+++ b/subplot/runcmd.py
@@ -0,0 +1,252 @@
+import logging
+import os
+import re
+import shlex
+import subprocess
+
+
+#
+# Helper functions.
+#
+
+# Get exit code or other stored data about the latest command run by
+# runcmd_run.
+
+
+def _runcmd_get(ctx, name):
+ ns = ctx.declare("_runcmd")
+ return ns[name]
+
+
+def runcmd_get_exit_code(ctx):
+ return _runcmd_get(ctx, "exit")
+
+
+def runcmd_get_stdout(ctx):
+ return _runcmd_get(ctx, "stdout")
+
+
+def runcmd_get_stdout_raw(ctx):
+ return _runcmd_get(ctx, "stdout.raw")
+
+
+def runcmd_get_stderr(ctx):
+ return _runcmd_get(ctx, "stderr")
+
+
+def runcmd_get_stderr_raw(ctx):
+ return _runcmd_get(ctx, "stderr.raw")
+
+
+def runcmd_get_argv(ctx):
+ return _runcmd_get(ctx, "argv")
+
+
+# Run a command, given an argv and other arguments for subprocess.Popen.
+#
+# This is meant to be a helper function, not bound directly to a step. The
+# stdout, stderr, and exit code are stored in the "_runcmd" namespace in the
+# ctx context.
+def runcmd_run(ctx, argv, **kwargs):
+ ns = ctx.declare("_runcmd")
+
+ # The Subplot Python template empties os.environ at startup, modulo a small
+ # number of variables with carefully chosen values. Here, we don't need to
+ # care about what those variables are, but we do need to not overwrite
+ # them, so we just add anything in the env keyword argument, if any, to
+ # os.environ.
+ env = dict(os.environ)
+ for key, arg in kwargs.pop("env", {}).items():
+ env[key] = arg
+
+ pp = ns.get("path-prefix")
+ if pp:
+ env["PATH"] = pp + ":" + env["PATH"]
+
+ logging.debug(f"runcmd_run")
+ logging.debug(f" argv: {argv}")
+ logging.debug(f" env: {env}")
+ p = subprocess.Popen(
+ argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env, **kwargs
+ )
+ stdout, stderr = p.communicate("")
+ ns["argv"] = argv
+ ns["stdout.raw"] = stdout
+ ns["stderr.raw"] = stderr
+ ns["stdout"] = stdout.decode("utf-8")
+ ns["stderr"] = stderr.decode("utf-8")
+ ns["exit"] = p.returncode
+ logging.debug(f" ctx: {ctx}")
+ logging.debug(f" ns: {ns}")
+
+
+# Step: prepend srcdir to PATH whenever runcmd runs a command.
+def runcmd_helper_srcdir_path(ctx):
+ srcdir = globals()["srcdir"]
+ runcmd_prepend_to_path(ctx, srcdir)
+
+
+# Step: This creates a helper script.
+def runcmd_helper_script(ctx, filename=None):
+ get_file = globals()["get_file"]
+ with open(filename, "wb") as f:
+ f.write(get_file(filename))
+
+
+#
+# Step functions for running commands.
+#
+
+
+def runcmd_prepend_to_path(ctx, dirname=None):
+ ns = ctx.declare("_runcmd")
+ pp = ns.get("path-prefix", "")
+ if pp:
+ pp = f"{pp}:{dirname}"
+ else:
+ pp = dirname
+ ns["path-prefix"] = pp
+
+
+def runcmd_step(ctx, argv0=None, args=None):
+ runcmd_try_to_run(ctx, argv0=argv0, args=args)
+ runcmd_exit_code_is_zero(ctx)
+
+
+def runcmd_try_to_run(ctx, argv0=None, args=None):
+ argv = [shlex.quote(argv0)] + shlex.split(args)
+ runcmd_run(ctx, argv)
+
+
+#
+# Step functions for examining exit codes.
+#
+
+
+def runcmd_exit_code_is_zero(ctx):
+ runcmd_exit_code_is(ctx, exit=0)
+
+
+def runcmd_exit_code_is(ctx, exit=None):
+ assert_eq = globals()["assert_eq"]
+ assert_eq(runcmd_get_exit_code(ctx), int(exit))
+
+
+def runcmd_exit_code_is_nonzero(ctx):
+ runcmd_exit_code_is_not(ctx, exit=0)
+
+
+def runcmd_exit_code_is_not(ctx, exit=None):
+ assert_ne = globals()["assert_ne"]
+ assert_ne(runcmd_get_exit_code(ctx), int(exit))
+
+
+#
+# Step functions and helpers for examining output in various ways.
+#
+
+
+def runcmd_stdout_is(ctx, text=None):
+ _runcmd_output_is(runcmd_get_stdout(ctx), text)
+
+
+def runcmd_stdout_isnt(ctx, text=None):
+ _runcmd_output_isnt(runcmd_get_stdout(ctx), text)
+
+
+def runcmd_stderr_is(ctx, text=None):
+ _runcmd_output_is(runcmd_get_stderr(ctx), text)
+
+
+def runcmd_stderr_isnt(ctx, text=None):
+ _runcmd_output_isnt(runcmd_get_stderr(ctx), text)
+
+
+def _runcmd_output_is(actual, wanted):
+ assert_eq = globals()["assert_eq"]
+ wanted = bytes(wanted, "utf8").decode("unicode_escape")
+ logging.debug("_runcmd_output_is:")
+ logging.debug(f" actual: {actual!r}")
+ logging.debug(f" wanted: {wanted!r}")
+ assert_eq(actual, wanted)
+
+
+def _runcmd_output_isnt(actual, wanted):
+ assert_ne = globals()["assert_ne"]
+ wanted = bytes(wanted, "utf8").decode("unicode_escape")
+ logging.debug("_runcmd_output_isnt:")
+ logging.debug(f" actual: {actual!r}")
+ logging.debug(f" wanted: {wanted!r}")
+ assert_ne(actual, wanted)
+
+
+def runcmd_stdout_contains(ctx, text=None):
+ _runcmd_output_contains(runcmd_get_stdout(ctx), text)
+
+
+def runcmd_stdout_doesnt_contain(ctx, text=None):
+ _runcmd_output_doesnt_contain(runcmd_get_stdout(ctx), text)
+
+
+def runcmd_stderr_contains(ctx, text=None):
+ _runcmd_output_contains(runcmd_get_stderr(ctx), text)
+
+
+def runcmd_stderr_doesnt_contain(ctx, text=None):
+ _runcmd_output_doesnt_contain(runcmd_get_stderr(ctx), text)
+
+
+def _runcmd_output_contains(actual, wanted):
+ assert_eq = globals()["assert_eq"]
+ wanted = bytes(wanted, "utf8").decode("unicode_escape")
+ logging.debug("_runcmd_output_contains:")
+ logging.debug(f" actual: {actual!r}")
+ logging.debug(f" wanted: {wanted!r}")
+ assert_eq(wanted in actual, True)
+
+
+def _runcmd_output_doesnt_contain(actual, wanted):
+ assert_ne = globals()["assert_ne"]
+ wanted = bytes(wanted, "utf8").decode("unicode_escape")
+ logging.debug("_runcmd_output_doesnt_contain:")
+ logging.debug(f" actual: {actual!r}")
+ logging.debug(f" wanted: {wanted!r}")
+ assert_ne(wanted in actual, True)
+
+
+def runcmd_stdout_matches_regex(ctx, regex=None):
+ _runcmd_output_matches_regex(runcmd_get_stdout(ctx), regex)
+
+
+def runcmd_stdout_doesnt_match_regex(ctx, regex=None):
+ _runcmd_output_doesnt_match_regex(runcmd_get_stdout(ctx), regex)
+
+
+def runcmd_stderr_matches_regex(ctx, regex=None):
+ _runcmd_output_matches_regex(runcmd_get_stderr(ctx), regex)
+
+
+def runcmd_stderr_doesnt_match_regex(ctx, regex=None):
+ _runcmd_output_doesnt_match_regex(runcmd_get_stderr(ctx), regex)
+
+
+def _runcmd_output_matches_regex(actual, regex):
+ assert_ne = globals()["assert_ne"]
+ r = re.compile(regex)
+ m = r.search(actual)
+ logging.debug("_runcmd_output_matches_regex:")
+ logging.debug(f" actual: {actual!r}")
+ logging.debug(f" regex: {regex!r}")
+ logging.debug(f" match: {m}")
+ assert_ne(m, None)
+
+
+def _runcmd_output_doesnt_match_regex(actual, regex):
+ assert_eq = globals()["assert_eq"]
+ r = re.compile(regex)
+ m = r.search(actual)
+ logging.debug("_runcmd_output_doesnt_match_regex:")
+ logging.debug(f" actual: {actual!r}")
+ logging.debug(f" regex: {regex!r}")
+ logging.debug(f" match: {m}")
+ assert_eq(m, None)
diff --git a/subplot/runcmd.yaml b/subplot/runcmd.yaml
new file mode 100644
index 0000000..48dde90
--- /dev/null
+++ b/subplot/runcmd.yaml
@@ -0,0 +1,83 @@
+# Steps to run commands.
+
+- given: helper script {filename} for runcmd
+ function: runcmd_helper_script
+
+- given: srcdir is in the PATH
+ function: runcmd_helper_srcdir_path
+
+- when: I run (?P<argv0>\S+)(?P<args>.*)
+ regex: true
+ function: runcmd_step
+
+- when: I try to run (?P<argv0>\S+)(?P<args>.*)
+ regex: true
+ function: runcmd_try_to_run
+
+# Steps to examine exit code of latest command.
+
+- then: exit code is {exit}
+ function: runcmd_exit_code_is
+
+- then: exit code is not {exit}
+ function: runcmd_exit_code_is_not
+
+- then: command is successful
+ function: runcmd_exit_code_is_zero
+
+- then: command fails
+ function: runcmd_exit_code_is_nonzero
+
+# Steps to examine stdout/stderr for exact content.
+
+- then: stdout is exactly "(?P<text>.*)"
+ regex: true
+ function: runcmd_stdout_is
+
+- then: "stdout isn't exactly \"(?P<text>.*)\""
+ regex: true
+ function: runcmd_stdout_isnt
+
+- then: stderr is exactly "(?P<text>.*)"
+ regex: true
+ function: runcmd_stderr_is
+
+- then: "stderr isn't exactly \"(?P<text>.*)\""
+ regex: true
+ function: runcmd_stderr_isnt
+
+# Steps to examine stdout/stderr for sub-strings.
+
+- then: stdout contains "(?P<text>.*)"
+ regex: true
+ function: runcmd_stdout_contains
+
+- then: "stdout doesn't contain \"(?P<text>.*)\""
+ regex: true
+ function: runcmd_stdout_doesnt_contain
+
+- then: stderr contains "(?P<text>.*)"
+ regex: true
+ function: runcmd_stderr_contains
+
+- then: "stderr doesn't contain \"(?P<text>.*)\""
+ regex: true
+ function: runcmd_stderr_doesnt_contain
+
+# Steps to match stdout/stderr against regular expressions.
+
+- then: stdout matches regex (?P<regex>.*)
+ regex: true
+ function: runcmd_stdout_matches_regex
+
+- then: stdout doesn't match regex (?P<regex>.*)
+ regex: true
+ function: runcmd_stdout_doesnt_match_regex
+
+- then: stderr matches regex (?P<regex>.*)
+ regex: true
+ function: runcmd_stderr_matches_regex
+
+- then: stderr doesn't match regex (?P<regex>.*)
+ regex: true
+ function: runcmd_stderr_doesnt_match_regex
diff --git a/subplot/summain.py b/subplot/summain.py
new file mode 100644
index 0000000..3ea6188
--- /dev/null
+++ b/subplot/summain.py
@@ -0,0 +1,36 @@
+import os
+
+
+def install_summain(ctx):
+ runcmd_prepend_to_path = globals()["runcmd_prepend_to_path"]
+ srcdir = globals()["srcdir"]
+ bindir = os.path.join(srcdir, "target", "debug")
+ runcmd_prepend_to_path(ctx, dirname=bindir)
+
+
+def create_directory(ctx, dirname=None):
+ os.mkdir(dirname)
+
+
+def create_file(ctx, filename=None):
+ open(filename, "w").close()
+
+
+def set_atime(ctx, filename=None, timestamp=None):
+ st = os.lstat(filename)
+ os.utime(filename, (int(timestamp), int(st.st_mtime)))
+
+
+def set_mtime(ctx, filename=None, timestamp=None):
+ st = os.lstat(filename)
+ os.utime(filename, (int(st.st_atime), int(timestamp)))
+
+
+def output_matches_file(ctx, filename=None):
+ runcmd_get_stdout = globals()["runcmd_get_stdout"]
+ get_file = globals()["get_file"]
+ assert_eq = globals()["assert_eq"]
+
+ actual = runcmd_get_stdout(ctx)
+ expected = get_file(filename).decode("UTF-8")
+ assert_eq(actual, expected)
diff --git a/subplot/summain.yaml b/subplot/summain.yaml
new file mode 100644
index 0000000..31bc28c
--- /dev/null
+++ b/subplot/summain.yaml
@@ -0,0 +1,17 @@
+- given: an installed summain
+ function: install_summain
+
+- given: directory {dirname}
+ function: create_directory
+
+- given: file {filename}
+ function: create_file
+
+- given: atime for {filename} is {timestamp}
+ function: set_atime
+
+- given: mtime for {filename} is {timestamp}
+ function: set_mtime
+
+- then: output matches file {filename}
+ function: output_matches_file
diff --git a/summain.md b/summain.md
new file mode 100644
index 0000000..7c2c5fa
--- /dev/null
+++ b/summain.md
@@ -0,0 +1,187 @@
+# Introduction
+
+A file manifest lists files, with their metadata.
+
+To verify a backup has been restored correctly, one can compare a
+manifest of the data before the backup and after it has been restored.
+If the manifests are identical, the data has been restored correctly.
+
+This requires a way to produce manifests that is deterministic: if run
+twice on the same input files, without the files having changed, the
+result should be identical. The Summain program does this.
+
+This version of Summain has been written in Rust for the [Obnam][]
+project.
+
+[Obnam]: https://obnam.org/
+
+## Why not mtree?
+
+[mtree]: http://cdn.netbsd.org/pub/pkgsrc/current/pkgsrc/pkgtools/mtree/README.html
+[NetBSD]: https://en.wikipedia.org/wiki/NetBSD
+
+[mtree][] is a tool included in [NetBSD][] Unix since version 1.2,
+released in 1996. It produces a manifest, and can check a manifest
+against the file system. It is, in principle, a tool that solves the
+same problem Summain. Why not use an existing tool. Some reasons:
+
+* I'm an anti-social not-invented-here jerk.
+* It's an old C program, without tests in the source tree.
+* The file format is custom, and not nice for reading by humans.
+* It doesn't handle Unicode well.
+ - a filename of `รถ` is encoded as `\M-C\M-6`
+ - but at least it can handle non-ASCII characters!
+* It doesn't handle file metadata that's Linux specific.
+ - extended attributes
+ - the ext4 immutable bit
+* It's single-threaded.
+
+In principle, there is no reason why mtree couldn't be extended to
+support everything I need for Obnam. In practice, since I'm working on
+this in my free time in order to have fun, I prefer to write a new
+tool in Rust.
+
+
+## Why not use the old Python version of Summain
+
+I don't like Python anymore. The old tool would need updates to work
+with current Python, and I'd rather use Rust.
+
+
+# Usage
+
+Summain is given one or more files or directories on the command line,
+and it outputs to its standard output a manifest. If the command line
+arguments are the same, and the files haven't changed, the manifest is
+the same.
+
+The output is YAML. Each file gets its own YAML document, delimieted
+by `---` and `...` as usual.
+
+Summain does not itself traverse directories. Instead, a tool like
+**find**(1) should be used. Summain will, however, sort its command
+line arguments so that it doesn't matter if they're always in the same
+order.
+
+# Acceptance criteria
+
+## Directory
+
+~~~scenario
+given an installed summain
+given directory empty
+and atime for empty is 123
+and mtime for empty is 456
+when I run chmod a=rx empty
+when I run summain empty
+then output matches file empty.yaml
+~~~
+
+```{#empty.yaml .file .numberLines}
+---
+path: empty
+atime: 123
+atime_nsec: 0
+mode: dr-xr-xr-x
+mtime: 456
+mtime_nsec: 0
+nlink: 2
+size: ~
+```
+
+## Writeable file
+
+
+~~~scenario
+given an installed summain
+given file foo
+and atime for foo is 11
+and mtime for foo is 22
+when I run chmod a=rw foo
+when I run summain foo
+then output matches file foo.yaml
+~~~
+
+```{#foo.yaml .file .numberLines}
+---
+path: foo
+atime: 11
+atime_nsec: 0
+mode: "-rw-rw-rw-"
+mtime: 22
+mtime_nsec: 0
+nlink: 1
+size: 0
+```
+
+## Read-only file
+
+~~~scenario
+given an installed summain
+given file foo
+and atime for foo is 33
+and mtime for foo is 44
+when I run chmod a=r foo
+when I run summain foo
+then output matches file readonly.yaml
+~~~
+
+```{#readonly.yaml .file .numberLines}
+---
+path: foo
+atime: 33
+atime_nsec: 0
+mode: "-r--r--r--"
+mtime: 44
+mtime_nsec: 0
+nlink: 1
+size: 0
+```
+
+## Two files sorted
+
+~~~scenario
+given an installed summain
+given file aaa
+and atime for aaa is 33
+and mtime for aaa is 44
+given file bbb
+and atime for bbb is 33
+and mtime for bbb is 44
+when I run chmod a=r aaa bbb
+when I run summain bbb aaa
+then output matches file aaabbb.yaml
+~~~
+
+```{#aaabbb.yaml .file .numberLines}
+---
+path: aaa
+atime: 33
+atime_nsec: 0
+mode: "-r--r--r--"
+mtime: 44
+mtime_nsec: 0
+nlink: 1
+size: 0
+---
+path: bbb
+atime: 33
+atime_nsec: 0
+mode: "-r--r--r--"
+mtime: 44
+mtime_nsec: 0
+nlink: 1
+size: 0
+```
+
+---
+title: "Summain&mdash;deterministic file manifests"
+author: Lars Wirzenius
+template: python
+bindings:
+ - subplot/summain.yaml
+ - subplot/runcmd.yaml
+functions:
+ - subplot/summain.py
+ - subplot/runcmd.py
+...