summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2022-03-12 07:52:05 +0200
committerLars Wirzenius <liw@liw.fi>2022-03-13 08:55:48 +0200
commit29e68de7973f2b294c50b7d33ef216a8f095b9f9 (patch)
tree4103f4c460d0db422b999b4f19097cd489b2a6ca
parent5e79c4f9b3e5789d1d40c2144ba6c3b6a868dae7 (diff)
downloadobnam2-29e68de7973f2b294c50b7d33ef216a8f095b9f9.tar.gz
feat! rename metadata field "sha256" to "label"
The field still contains a cleartext SHa256 of the cleartext chunk data, but this makes it clearer that it may contain other data. This is a breaking change: the server API won't work with an old client, and the new client won't work with an old server. To avoid the breakage would require more effort than is warranted at this time, given the very small number of users of Obnam. Sorry. Sponsored-by: author
-rw-r--r--obnam.md37
-rw-r--r--src/bin/obnam-server.rs4
-rw-r--r--src/chunk.rs4
-rw-r--r--src/chunkmeta.rs37
-rw-r--r--src/client.rs2
-rw-r--r--src/index.rs14
-rw-r--r--src/indexedstore.rs6
-rw-r--r--subplot/server.py4
-rw-r--r--subplot/server.yaml4
9 files changed, 59 insertions, 53 deletions
diff --git a/obnam.md b/obnam.md
index 50c293c..8b80eed 100644
--- a/obnam.md
+++ b/obnam.md
@@ -835,12 +835,13 @@ Chunks consist of arbitrary binary data, a small amount of metadata,
and an identifier chosen by the server. The chunk metadata is a JSON
object, consisting of the following fields:
-* `sha256` &mdash; the SHA256 checksum of the chunk contents as
+* `label` &mdash; the SHA256 checksum of the chunk contents as
determined by the client
- this MUST be set for every chunk, including generation chunks
- the server allows for searching based on this field
- note that the server doesn't verify this in any way, to pave way
- for future client-side encryption of the chunk data
+ for future client-side encryption of the chunk data, including the
+ label
* `generation` &mdash; set to `true` if the chunk represents a
generation
- may also be set to `false` or `null` or be missing entirely
@@ -865,7 +866,7 @@ The server has the following API for managing chunks:
server, return its randomly chosen identifier
* `GET /chunks/<ID>` &mdash; retrieve a chunk (and its metadata) from
the server, given a chunk identifier
-* `GET /chunks?sha256=xyzzy` &mdash; find chunks on the server whose
+* `GET /chunks?label=xyzzy` &mdash; find chunks on the server whose
metadata indicates their contents has a given SHA256 checksum
* `GET /chunks?generation=true` &mdash; find generation chunks
* `GET /chunks?data=True` &mdash; find chunks with file data
@@ -903,7 +904,7 @@ metadata are returned in a JSON object:
~~~json
{
"fe20734b-edb3-432f-83c3-d35fe15969dd": {
- "sha256": "09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b",
+ "label": "09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b",
"generation": null,
"ended: null,
}
@@ -1036,7 +1037,7 @@ storage of backed up data.
~~~scenario
given a working Obnam system
and a file data.dat containing some random data
-when I POST data.dat to /chunks, with chunk-meta: {"sha256":"abc"}
+when I POST data.dat to /chunks, with chunk-meta: {"label":"abc"}
then HTTP status code is 201
and content-type is application/json
and the JSON body has a field chunk_id, henceforth ID
@@ -1049,17 +1050,17 @@ We must be able to retrieve it.
when I GET /chunks/<ID>
then HTTP status code is 200
and content-type is application/octet-stream
-and chunk-meta is {"sha256":"abc","generation":null,"ended":null}
+and chunk-meta is {"label":"abc","generation":null,"ended":null}
and the body matches file data.dat
~~~
We must also be able to find it based on metadata.
~~~scenario
-when I GET /chunks?sha256=abc
+when I GET /chunks?label=abc
then HTTP status code is 200
and content-type is application/json
-and the JSON body matches {"<ID>":{"sha256":"abc","generation":null,"ended":null}}
+and the JSON body matches {"<ID>":{"label":"abc","generation":null,"ended":null}}
~~~
Finally, we must be able to delete it. After that, we must not be able
@@ -1072,7 +1073,7 @@ then HTTP status code is 200
when I GET /chunks/<ID>
then HTTP status code is 404
-when I GET /chunks?sha256=abc
+when I GET /chunks?label=abc
then HTTP status code is 200
and content-type is application/json
and the JSON body matches {}
@@ -1095,7 +1096,7 @@ We must get an empty result if searching for chunks that don't exist.
~~~scenario
given a working Obnam system
-when I GET /chunks?sha256=abc
+when I GET /chunks?label=abc
then HTTP status code is 200
and content-type is application/json
and the JSON body matches {}
@@ -1122,7 +1123,7 @@ First, create a chunk.
~~~scenario
given a working Obnam system
and a file data.dat containing some random data
-when I POST data.dat to /chunks, with chunk-meta: {"sha256":"abc"}
+when I POST data.dat to /chunks, with chunk-meta: {"label":"abc"}
then HTTP status code is 201
and content-type is application/json
and the JSON body has a field chunk_id, henceforth ID
@@ -1138,10 +1139,10 @@ given a running chunk server
Can we still find it by its metadata?
~~~scenario
-when I GET /chunks?sha256=abc
+when I GET /chunks?label=abc
then HTTP status code is 200
and content-type is application/json
-and the JSON body matches {"<ID>":{"sha256":"abc","generation":null,"ended":null}}
+and the JSON body matches {"<ID>":{"label":"abc","generation":null,"ended":null}}
~~~
Can we still retrieve it by its identifier?
@@ -1150,7 +1151,7 @@ Can we still retrieve it by its identifier?
when I GET /chunks/<ID>
then HTTP status code is 200
and content-type is application/octet-stream
-and chunk-meta is {"sha256":"abc","generation":null,"ended":null}
+and chunk-meta is {"label":"abc","generation":null,"ended":null}
and the body matches file data.dat
~~~
@@ -1164,14 +1165,14 @@ server more chatty.
~~~scenario
given a working Obnam system
and a file data1.dat containing some random data
-when I POST data1.dat to /chunks, with chunk-meta: {"sha256":"qwerty"}
+when I POST data1.dat to /chunks, with chunk-meta: {"label":"qwerty"}
then the JSON body has a field chunk_id, henceforth ID
and chunk server's stderr doesn't contain "Obnam server starting up"
and chunk server's stderr doesn't contain "created chunk <ID>"
given a running chunk server with environment {"OBNAM_SERVER_LOG": "info"}
and a file data2.dat containing some random data
-when I POST data2.dat to /chunks, with chunk-meta: {"sha256":"xyz"}
+when I POST data2.dat to /chunks, with chunk-meta: {"label":"xyz"}
then the JSON body has a field chunk_id, henceforth ID
and chunk server's stderr contains "Obnam server starting up"
and chunk server's stderr contains "created chunk <ID>"
@@ -1274,8 +1275,8 @@ roots: [live]
given a working Obnam system
given a client config based on smoke.yaml
given a file cleartext.dat containing some random data
-when I run obnam encrypt-chunk cleartext.dat encrypted.dat '{"sha256":"fake"}'
-when I run obnam decrypt-chunk encrypted.dat decrypted.dat '{"sha256":"fake"}'
+when I run obnam encrypt-chunk cleartext.dat encrypted.dat '{"label":"fake"}'
+when I run obnam decrypt-chunk encrypted.dat decrypted.dat '{"label":"fake"}'
then files cleartext.dat and encrypted.dat are different
then files cleartext.dat and decrypted.dat are identical
~~~
diff --git a/src/bin/obnam-server.rs b/src/bin/obnam-server.rs
index f06b7b5..0b80854 100644
--- a/src/bin/obnam-server.rs
+++ b/src/bin/obnam-server.rs
@@ -155,8 +155,8 @@ pub async fn search_chunks(
}
if key == "generation" && value == "true" {
store.find_generations().expect("SQL lookup failed")
- } else if key == "sha256" {
- store.find_by_sha256(value).expect("SQL lookup failed")
+ } else if key == "label" {
+ store.find_by_label(value).expect("SQL lookup failed")
} else {
error!("unknown search key {:?}", key);
return Ok(ChunkResult::BadRequest);
diff --git a/src/chunk.rs b/src/chunk.rs
index 15e3288..a37aa57 100644
--- a/src/chunk.rs
+++ b/src/chunk.rs
@@ -97,8 +97,8 @@ impl GenerationChunk {
let json: String =
serde_json::to_string(self).map_err(GenerationChunkError::JsonGenerate)?;
let bytes = json.as_bytes().to_vec();
- let sha = Checksum::sha256(&bytes);
- let meta = ChunkMeta::new_generation(&sha, ended);
+ let checksum = Checksum::sha256(&bytes);
+ let meta = ChunkMeta::new_generation(&checksum, ended);
Ok(DataChunk::new(bytes, meta))
}
}
diff --git a/src/chunkmeta.rs b/src/chunkmeta.rs
index 06a187b..9a435fe 100644
--- a/src/chunkmeta.rs
+++ b/src/chunkmeta.rs
@@ -10,7 +10,8 @@ use std::str::FromStr;
/// We manage three bits of metadata about chunks, in addition to its
/// identifier:
///
-/// * for all chunks, a [SHA256][] checksum of the chunk content
+/// * for all chunks, a [SHA256][] checksum of the chunk content; we
+/// expose this to the server as the chunk "label"
///
/// * for generation chunks, an indication that it is a generation
/// chunk, and a timestamp for when making the generation snapshot
@@ -23,7 +24,7 @@ use std::str::FromStr;
///
/// ~~~json
/// {
-/// "sha256": "09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b",
+/// "label": "09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b",
/// "generation": true,
/// "ended": "2020-09-17T08:17:13+03:00"
/// }
@@ -40,7 +41,7 @@ use std::str::FromStr;
/// [SHA256]: https://en.wikipedia.org/wiki/SHA-2
#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
pub struct ChunkMeta {
- sha256: String,
+ label: String,
// The remaining fields are Options so that JSON parsing doesn't
// insist on them being there in the textual representation.
generation: Option<bool>,
@@ -51,18 +52,18 @@ impl ChunkMeta {
/// Create a new data chunk.
///
/// Data chunks are not for generations.
- pub fn new(sha256: &Checksum) -> Self {
+ pub fn new(checksum: &Checksum) -> Self {
ChunkMeta {
- sha256: sha256.to_string(),
+ label: checksum.to_string(),
generation: None,
ended: None,
}
}
/// Create a new generation chunk.
- pub fn new_generation(sha256: &Checksum, ended: &str) -> Self {
+ pub fn new_generation(checksum: &Checksum, ended: &str) -> Self {
ChunkMeta {
- sha256: sha256.to_string(),
+ label: checksum.to_string(),
generation: Some(true),
ended: Some(ended.to_string()),
}
@@ -78,9 +79,13 @@ impl ChunkMeta {
self.ended.as_deref()
}
- /// SHA256 checksum of the content of the chunk.
- pub fn sha256(&self) -> &str {
- &self.sha256
+ /// The label of the content of the chunk.
+ ///
+ /// The caller should not interpret the label in any way. It
+ /// happens to be a SHA256 of the cleartext contents of the
+ /// checksum for now, but that _will_ change in the future.
+ pub fn label(&self) -> &str {
+ &self.label
}
/// Serialize from a textual JSON representation.
@@ -118,7 +123,7 @@ mod test {
let meta = ChunkMeta::new(&sum);
assert!(!meta.is_generation());
assert_eq!(meta.ended(), None);
- assert_eq!(meta.sha256(), "abcdef");
+ assert_eq!(meta.label(), "abcdef");
}
#[test]
@@ -127,26 +132,26 @@ mod test {
let meta = ChunkMeta::new_generation(&sum, "2020-09-17T08:17:13+03:00");
assert!(meta.is_generation());
assert_eq!(meta.ended(), Some("2020-09-17T08:17:13+03:00"));
- assert_eq!(meta.sha256(), "abcdef");
+ assert_eq!(meta.label(), "abcdef");
}
#[test]
fn data_chunk_from_json() {
- let meta: ChunkMeta = r#"{"sha256": "abcdef"}"#.parse().unwrap();
+ let meta: ChunkMeta = r#"{"label": "abcdef"}"#.parse().unwrap();
assert!(!meta.is_generation());
assert_eq!(meta.ended(), None);
- assert_eq!(meta.sha256(), "abcdef");
+ assert_eq!(meta.label(), "abcdef");
}
#[test]
fn generation_chunk_from_json() {
let meta: ChunkMeta =
- r#"{"sha256": "abcdef", "generation": true, "ended": "2020-09-17T08:17:13+03:00"}"#
+ r#"{"label": "abcdef", "generation": true, "ended": "2020-09-17T08:17:13+03:00"}"#
.parse()
.unwrap();
assert!(meta.is_generation());
assert_eq!(meta.ended(), Some("2020-09-17T08:17:13+03:00"));
- assert_eq!(meta.sha256(), "abcdef");
+ assert_eq!(meta.label(), "abcdef");
}
#[test]
diff --git a/src/client.rs b/src/client.rs
index bcc31b4..b58f89c 100644
--- a/src/client.rs
+++ b/src/client.rs
@@ -130,7 +130,7 @@ impl BackupClient {
/// Does the server have a chunk?
pub async fn has_chunk(&self, meta: &ChunkMeta) -> Result<Option<ChunkId>, ClientError> {
- let body = match self.get("", &[("sha256", meta.sha256())]).await {
+ let body = match self.get("", &[("label", meta.label())]).await {
Ok((_, body)) => body,
Err(err) => return Err(err),
};
diff --git a/src/index.rs b/src/index.rs
index b9d29a2..4a1b9c9 100644
--- a/src/index.rs
+++ b/src/index.rs
@@ -61,8 +61,8 @@ impl Index {
sql::remove(&self.conn, id)
}
- /// Find chunks with a given checksum.
- pub fn find_by_sha256(&self, sha256: &str) -> Result<Vec<ChunkId>, IndexError> {
+ /// Find chunks with a client-assigned label.
+ pub fn find_by_label(&self, sha256: &str) -> Result<Vec<ChunkId>, IndexError> {
sql::find_by_256(&self.conn, sha256)
}
@@ -98,7 +98,7 @@ mod test {
let mut idx = new_index(dir.path());
idx.insert_meta(id.clone(), meta.clone()).unwrap();
assert_eq!(idx.get_meta(&id).unwrap(), meta);
- let ids = idx.find_by_sha256("abc").unwrap();
+ let ids = idx.find_by_label("abc").unwrap();
assert_eq!(ids, vec![id]);
}
@@ -110,7 +110,7 @@ mod test {
let dir = tempdir().unwrap();
let mut idx = new_index(dir.path());
idx.insert_meta(id, meta).unwrap();
- assert_eq!(idx.find_by_sha256("def").unwrap().len(), 0)
+ assert_eq!(idx.find_by_label("def").unwrap().len(), 0)
}
#[test]
@@ -122,7 +122,7 @@ mod test {
let mut idx = new_index(dir.path());
idx.insert_meta(id.clone(), meta).unwrap();
idx.remove_meta(&id).unwrap();
- let ids: Vec<ChunkId> = idx.find_by_sha256("abc").unwrap();
+ let ids: Vec<ChunkId> = idx.find_by_label("abc").unwrap();
assert_eq!(ids, vec![]);
}
@@ -193,12 +193,12 @@ mod sql {
/// Insert a new chunk's metadata into database.
pub fn insert(t: &Transaction, chunkid: &ChunkId, meta: &ChunkMeta) -> Result<(), IndexError> {
let chunkid = format!("{}", chunkid);
- let sha256 = meta.sha256();
+ let label = meta.label();
let generation = if meta.is_generation() { 1 } else { 0 };
let ended = meta.ended();
t.execute(
"INSERT INTO chunks (id, sha256, generation, ended) VALUES (?1, ?2, ?3, ?4)",
- params![chunkid, sha256, generation, ended],
+ params![chunkid, label, generation, ended],
)?;
Ok(())
}
diff --git a/src/indexedstore.rs b/src/indexedstore.rs
index 49953ee..46f9e72 100644
--- a/src/indexedstore.rs
+++ b/src/indexedstore.rs
@@ -63,9 +63,9 @@ impl IndexedStore {
Ok(self.index.get_meta(id)?)
}
- /// Find chunks with a given checksum.
- pub fn find_by_sha256(&self, sha256: &str) -> Result<Vec<ChunkId>, IndexedError> {
- Ok(self.index.find_by_sha256(sha256)?)
+ /// Find chunks with a client-assigned label.
+ pub fn find_by_label(&self, label: &str) -> Result<Vec<ChunkId>, IndexedError> {
+ Ok(self.index.find_by_label(label)?)
}
/// Find all generations.
diff --git a/subplot/server.py b/subplot/server.py
index 2a3e397..de63836 100644
--- a/subplot/server.py
+++ b/subplot/server.py
@@ -69,8 +69,8 @@ def get_chunk_by_id(ctx, chunk_id=None):
_request(ctx, requests.get, url)
-def find_chunks_with_sha(ctx, sha=None):
- url = f"{ctx['server_url']}/chunks?sha256={sha}"
+def find_chunks_with_label(ctx, sha=None):
+ url = f"{ctx['server_url']}/chunks?label={sha}"
_request(ctx, requests.get, url)
diff --git a/subplot/server.yaml b/subplot/server.yaml
index faf8f49..7b7d461 100644
--- a/subplot/server.yaml
+++ b/subplot/server.yaml
@@ -31,11 +31,11 @@
python:
function: get_chunk_by_id
-- when: "I GET /chunks?sha256={sha}"
+- when: "I GET /chunks?label={sha}"
regex: false
impl:
python:
- function: find_chunks_with_sha
+ function: find_chunks_with_label
- when: "I DELETE /chunks/<{var}>"
impl: