summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2022-04-09 09:40:59 +0300
committerLars Wirzenius <liw@liw.fi>2022-04-16 09:06:05 +0300
commit82ff782fe85c84c10f1f18c9bd5c2b017bc2f240 (patch)
treedee4767299a179067b9b54d557e96f95af88801c
parentd9b72ffa5485f3c253da22f09ff0a7090de7aa37 (diff)
downloadobnam2-82ff782fe85c84c10f1f18c9bd5c2b017bc2f240.tar.gz
feat! change how chunk labels are serialized
Serialized labels now start with a type prefix: a character that says what type of label it is. This isn't strictly required: we _can_ just decide to always use a single type of checksum for all chunks in one backup, for one client, or in the whole repository. However, if it's ever possible to have more than one type, it helps debugging if every checksum, when serialized, is explicit about its type. Change things to use the new serialize method instead of the Display trait for Label. We're primarily serializing labels so they can be stored in a database, and used in URLs, only secondarily showing them to users. Sponsored-by: author
-rw-r--r--obnam.md20
-rw-r--r--src/chunkmeta.rs6
-rw-r--r--src/client.rs2
-rw-r--r--src/index.rs6
-rw-r--r--src/label.rs59
5 files changed, 65 insertions, 28 deletions
diff --git a/obnam.md b/obnam.md
index a02aacb..381802c 100644
--- a/obnam.md
+++ b/obnam.md
@@ -1092,7 +1092,7 @@ storage of backed up data.
~~~scenario
given a working Obnam system
and a file data.dat containing some random data
-when I POST data.dat to /v1/chunks, with chunk-meta: {"label":"abc"}
+when I POST data.dat to /v1/chunks, with chunk-meta: {"label":"0abc"}
then HTTP status code is 201
and content-type is application/json
and the JSON body has a field chunk_id, henceforth ID
@@ -1105,17 +1105,17 @@ We must be able to retrieve it.
when I GET /v1/chunks/<ID>
then HTTP status code is 200
and content-type is application/octet-stream
-and chunk-meta is {"label":"abc"}
+and chunk-meta is {"label":"0abc"}
and the body matches file data.dat
~~~
We must also be able to find it based on metadata.
~~~scenario
-when I GET /v1/chunks?label=abc
+when I GET /v1/chunks?label=0abc
then HTTP status code is 200
and content-type is application/json
-and the JSON body matches {"<ID>":{"label":"abc"}}
+and the JSON body matches {"<ID>":{"label":"0abc"}}
~~~
Finally, we must be able to delete it. After that, we must not be able
@@ -1128,7 +1128,7 @@ then HTTP status code is 200
when I GET /v1/chunks/<ID>
then HTTP status code is 404
-when I GET /v1/chunks?label=abc
+when I GET /v1/chunks?label=0abc
then HTTP status code is 200
and content-type is application/json
and the JSON body matches {}
@@ -1151,7 +1151,7 @@ We must get an empty result if searching for chunks that don't exist.
~~~scenario
given a working Obnam system
-when I GET /v1/chunks?label=abc
+when I GET /v1/chunks?label=0abc
then HTTP status code is 200
and content-type is application/json
and the JSON body matches {}
@@ -1178,7 +1178,7 @@ First, create a chunk.
~~~scenario
given a working Obnam system
and a file data.dat containing some random data
-when I POST data.dat to /v1/chunks, with chunk-meta: {"label":"abc"}
+when I POST data.dat to /v1/chunks, with chunk-meta: {"label":"0abc"}
then HTTP status code is 201
and content-type is application/json
and the JSON body has a field chunk_id, henceforth ID
@@ -1194,10 +1194,10 @@ given a running chunk server
Can we still find it by its metadata?
~~~scenario
-when I GET /v1/chunks?label=abc
+when I GET /v1/chunks?label=0abc
then HTTP status code is 200
and content-type is application/json
-and the JSON body matches {"<ID>":{"label":"abc"}}
+and the JSON body matches {"<ID>":{"label":"0abc"}}
~~~
Can we still retrieve it by its identifier?
@@ -1206,7 +1206,7 @@ Can we still retrieve it by its identifier?
when I GET /v1/chunks/<ID>
then HTTP status code is 200
and content-type is application/octet-stream
-and chunk-meta is {"label":"abc"}
+and chunk-meta is {"label":"0abc"}
and the body matches file data.dat
~~~
diff --git a/src/chunkmeta.rs b/src/chunkmeta.rs
index 1f591c6..fe7ef4c 100644
--- a/src/chunkmeta.rs
+++ b/src/chunkmeta.rs
@@ -39,7 +39,7 @@ impl ChunkMeta {
/// Data chunks are not for generations.
pub fn new(label: &Label) -> Self {
ChunkMeta {
- label: label.to_string(),
+ label: label.serialize(),
}
}
@@ -85,14 +85,14 @@ mod test {
fn new_creates_data_chunk() {
let sum = Label::sha256(b"abcdef");
let meta = ChunkMeta::new(&sum);
- assert_eq!(meta.label(), &format!("{}", sum));
+ assert_eq!(meta.label(), sum.serialize());
}
#[test]
fn new_generation_creates_generation_chunk() {
let sum = Label::sha256(b"abcdef");
let meta = ChunkMeta::new(&sum);
- assert_eq!(meta.label(), &format!("{}", sum));
+ assert_eq!(meta.label(), sum.serialize());
}
#[test]
diff --git a/src/client.rs b/src/client.rs
index d8bf262..bed5f1e 100644
--- a/src/client.rs
+++ b/src/client.rs
@@ -196,7 +196,7 @@ impl BackupClient {
}
async fn find_client_trusts(&self) -> Result<Vec<ChunkId>, ClientError> {
- let label = format!("{}", Label::literal("client-trust"));
+ let label = Label::literal("client-trust").serialize();
let body = match self.get("", &[("label", &label)]).await {
Ok((_, body)) => body,
Err(err) => return Err(err),
diff --git a/src/index.rs b/src/index.rs
index 5310a44..52da2f2 100644
--- a/src/index.rs
+++ b/src/index.rs
@@ -93,7 +93,7 @@ mod test {
let mut idx = new_index(dir.path());
idx.insert_meta(id.clone(), meta.clone()).unwrap();
assert_eq!(idx.get_meta(&id).unwrap(), meta);
- let ids = idx.find_by_label(&format!("{}", sum)).unwrap();
+ let ids = idx.find_by_label(&sum.serialize()).unwrap();
assert_eq!(ids, vec![id]);
}
@@ -117,7 +117,7 @@ mod test {
let mut idx = new_index(dir.path());
idx.insert_meta(id.clone(), meta).unwrap();
idx.remove_meta(&id).unwrap();
- let ids: Vec<ChunkId> = idx.find_by_label(&format!("{}", sum)).unwrap();
+ let ids: Vec<ChunkId> = idx.find_by_label(&sum.serialize()).unwrap();
assert_eq!(ids, vec![]);
}
}
@@ -216,7 +216,7 @@ mod sql {
fn row_to_meta(row: &Row) -> rusqlite::Result<ChunkMeta> {
let hash: String = row.get("label")?;
- let sha256 = Label::sha256_from_str_unchecked(&hash);
+ let sha256 = Label::deserialize(&hash).expect("deserialize checksum from database");
Ok(ChunkMeta::new(&sha256))
}
diff --git a/src/label.rs b/src/label.rs
index 7ee55d1..64be341 100644
--- a/src/label.rs
+++ b/src/label.rs
@@ -6,7 +6,9 @@
//! small number of carefully chosen algorithms are supported here.
use sha2::{Digest, Sha256};
-use std::fmt;
+
+const LITERAL: char = '0';
+const SHA256: char = '1';
/// A checksum of some data.
#[derive(Debug, Clone)]
@@ -32,18 +34,53 @@ impl Label {
Self::Sha256(format!("{:x}", hash))
}
- /// Create a `Checksum` from a known, previously computed hash.
- pub fn sha256_from_str_unchecked(hash: &str) -> Self {
- Self::Sha256(hash.to_string())
+ /// Serialize a label into a string representation.
+ pub fn serialize(&self) -> String {
+ match self {
+ Self::Literal(s) => format!("{}{}", LITERAL, s),
+ Self::Sha256(hash) => format!("{}{}", SHA256, hash),
+ }
}
-}
-impl fmt::Display for Label {
- /// Format a checksum for display.
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- match self {
- Self::Literal(s) => write!(f, "{}", s),
- Self::Sha256(hash) => write!(f, "{}", hash),
+ /// De-serialize a label from its string representation.
+ pub fn deserialize(s: &str) -> Result<Self, LabelError> {
+ if s.starts_with(LITERAL) {
+ Ok(Self::Literal(s[1..].to_string()))
+ } else if s.starts_with(SHA256) {
+ Ok(Self::Sha256(s[1..].to_string()))
+ } else {
+ Err(LabelError::UnknownType(s.to_string()))
}
}
}
+
+/// Possible errors from dealing with chunk labels.
+#[derive(Debug, thiserror::Error)]
+pub enum LabelError {
+ /// Serialized label didn't start with a known type prefix.
+ #[error("Unknown label: {0:?}")]
+ UnknownType(String),
+}
+
+#[cfg(test)]
+mod test {
+ use super::Label;
+
+ #[test]
+ fn roundtrip_literal() {
+ let label = Label::literal("dummy data");
+ let serialized = label.serialize();
+ let de = Label::deserialize(&serialized).unwrap();
+ let seri2 = de.serialize();
+ assert_eq!(serialized, seri2);
+ }
+
+ #[test]
+ fn roundtrip_sha256() {
+ let label = Label::sha256(b"dummy data");
+ let serialized = label.serialize();
+ let de = Label::deserialize(&serialized).unwrap();
+ let seri2 = de.serialize();
+ assert_eq!(serialized, seri2);
+ }
+}