summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2022-03-14 06:42:06 +0000
committerLars Wirzenius <liw@liw.fi>2022-03-14 06:42:06 +0000
commitc0297627baa7bad226793adba57a2add1da3f837 (patch)
tree4103f4c460d0db422b999b4f19097cd489b2a6ca
parent5e79c4f9b3e5789d1d40c2144ba6c3b6a868dae7 (diff)
parent29e68de7973f2b294c50b7d33ef216a8f095b9f9 (diff)
downloadobnam2-c0297627baa7bad226793adba57a2add1da3f837.tar.gz
Merge branch 'liw/label' into 'main'
feat! rename metadata field "sha256" to "label" See merge request obnam/obnam!220
-rw-r--r--obnam.md37
-rw-r--r--src/bin/obnam-server.rs4
-rw-r--r--src/chunk.rs4
-rw-r--r--src/chunkmeta.rs37
-rw-r--r--src/client.rs2
-rw-r--r--src/index.rs14
-rw-r--r--src/indexedstore.rs6
-rw-r--r--subplot/server.py4
-rw-r--r--subplot/server.yaml4
9 files changed, 59 insertions, 53 deletions
diff --git a/obnam.md b/obnam.md
index 50c293c..8b80eed 100644
--- a/obnam.md
+++ b/obnam.md
@@ -835,12 +835,13 @@ Chunks consist of arbitrary binary data, a small amount of metadata,
and an identifier chosen by the server. The chunk metadata is a JSON
object, consisting of the following fields:
-* `sha256` &mdash; the SHA256 checksum of the chunk contents as
+* `label` &mdash; the SHA256 checksum of the chunk contents as
determined by the client
- this MUST be set for every chunk, including generation chunks
- the server allows for searching based on this field
- note that the server doesn't verify this in any way, to pave way
- for future client-side encryption of the chunk data
+ for future client-side encryption of the chunk data, including the
+ label
* `generation` &mdash; set to `true` if the chunk represents a
generation
- may also be set to `false` or `null` or be missing entirely
@@ -865,7 +866,7 @@ The server has the following API for managing chunks:
server, return its randomly chosen identifier
* `GET /chunks/<ID>` &mdash; retrieve a chunk (and its metadata) from
the server, given a chunk identifier
-* `GET /chunks?sha256=xyzzy` &mdash; find chunks on the server whose
+* `GET /chunks?label=xyzzy` &mdash; find chunks on the server whose
metadata indicates their contents has a given SHA256 checksum
* `GET /chunks?generation=true` &mdash; find generation chunks
* `GET /chunks?data=True` &mdash; find chunks with file data
@@ -903,7 +904,7 @@ metadata are returned in a JSON object:
~~~json
{
"fe20734b-edb3-432f-83c3-d35fe15969dd": {
- "sha256": "09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b",
+ "label": "09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b",
"generation": null,
"ended: null,
}
@@ -1036,7 +1037,7 @@ storage of backed up data.
~~~scenario
given a working Obnam system
and a file data.dat containing some random data
-when I POST data.dat to /chunks, with chunk-meta: {"sha256":"abc"}
+when I POST data.dat to /chunks, with chunk-meta: {"label":"abc"}
then HTTP status code is 201
and content-type is application/json
and the JSON body has a field chunk_id, henceforth ID
@@ -1049,17 +1050,17 @@ We must be able to retrieve it.
when I GET /chunks/<ID>
then HTTP status code is 200
and content-type is application/octet-stream
-and chunk-meta is {"sha256":"abc","generation":null,"ended":null}
+and chunk-meta is {"label":"abc","generation":null,"ended":null}
and the body matches file data.dat
~~~
We must also be able to find it based on metadata.
~~~scenario
-when I GET /chunks?sha256=abc
+when I GET /chunks?label=abc
then HTTP status code is 200
and content-type is application/json
-and the JSON body matches {"<ID>":{"sha256":"abc","generation":null,"ended":null}}
+and the JSON body matches {"<ID>":{"label":"abc","generation":null,"ended":null}}
~~~
Finally, we must be able to delete it. After that, we must not be able
@@ -1072,7 +1073,7 @@ then HTTP status code is 200
when I GET /chunks/<ID>
then HTTP status code is 404
-when I GET /chunks?sha256=abc
+when I GET /chunks?label=abc
then HTTP status code is 200
and content-type is application/json
and the JSON body matches {}
@@ -1095,7 +1096,7 @@ We must get an empty result if searching for chunks that don't exist.
~~~scenario
given a working Obnam system
-when I GET /chunks?sha256=abc
+when I GET /chunks?label=abc
then HTTP status code is 200
and content-type is application/json
and the JSON body matches {}
@@ -1122,7 +1123,7 @@ First, create a chunk.
~~~scenario
given a working Obnam system
and a file data.dat containing some random data
-when I POST data.dat to /chunks, with chunk-meta: {"sha256":"abc"}
+when I POST data.dat to /chunks, with chunk-meta: {"label":"abc"}
then HTTP status code is 201
and content-type is application/json
and the JSON body has a field chunk_id, henceforth ID
@@ -1138,10 +1139,10 @@ given a running chunk server
Can we still find it by its metadata?
~~~scenario
-when I GET /chunks?sha256=abc
+when I GET /chunks?label=abc
then HTTP status code is 200
and content-type is application/json
-and the JSON body matches {"<ID>":{"sha256":"abc","generation":null,"ended":null}}
+and the JSON body matches {"<ID>":{"label":"abc","generation":null,"ended":null}}
~~~
Can we still retrieve it by its identifier?
@@ -1150,7 +1151,7 @@ Can we still retrieve it by its identifier?
when I GET /chunks/<ID>
then HTTP status code is 200
and content-type is application/octet-stream
-and chunk-meta is {"sha256":"abc","generation":null,"ended":null}
+and chunk-meta is {"label":"abc","generation":null,"ended":null}
and the body matches file data.dat
~~~
@@ -1164,14 +1165,14 @@ server more chatty.
~~~scenario
given a working Obnam system
and a file data1.dat containing some random data
-when I POST data1.dat to /chunks, with chunk-meta: {"sha256":"qwerty"}
+when I POST data1.dat to /chunks, with chunk-meta: {"label":"qwerty"}
then the JSON body has a field chunk_id, henceforth ID
and chunk server's stderr doesn't contain "Obnam server starting up"
and chunk server's stderr doesn't contain "created chunk <ID>"
given a running chunk server with environment {"OBNAM_SERVER_LOG": "info"}
and a file data2.dat containing some random data
-when I POST data2.dat to /chunks, with chunk-meta: {"sha256":"xyz"}
+when I POST data2.dat to /chunks, with chunk-meta: {"label":"xyz"}
then the JSON body has a field chunk_id, henceforth ID
and chunk server's stderr contains "Obnam server starting up"
and chunk server's stderr contains "created chunk <ID>"
@@ -1274,8 +1275,8 @@ roots: [live]
given a working Obnam system
given a client config based on smoke.yaml
given a file cleartext.dat containing some random data
-when I run obnam encrypt-chunk cleartext.dat encrypted.dat '{"sha256":"fake"}'
-when I run obnam decrypt-chunk encrypted.dat decrypted.dat '{"sha256":"fake"}'
+when I run obnam encrypt-chunk cleartext.dat encrypted.dat '{"label":"fake"}'
+when I run obnam decrypt-chunk encrypted.dat decrypted.dat '{"label":"fake"}'
then files cleartext.dat and encrypted.dat are different
then files cleartext.dat and decrypted.dat are identical
~~~
diff --git a/src/bin/obnam-server.rs b/src/bin/obnam-server.rs
index f06b7b5..0b80854 100644
--- a/src/bin/obnam-server.rs
+++ b/src/bin/obnam-server.rs
@@ -155,8 +155,8 @@ pub async fn search_chunks(
}
if key == "generation" && value == "true" {
store.find_generations().expect("SQL lookup failed")
- } else if key == "sha256" {
- store.find_by_sha256(value).expect("SQL lookup failed")
+ } else if key == "label" {
+ store.find_by_label(value).expect("SQL lookup failed")
} else {
error!("unknown search key {:?}", key);
return Ok(ChunkResult::BadRequest);
diff --git a/src/chunk.rs b/src/chunk.rs
index 15e3288..a37aa57 100644
--- a/src/chunk.rs
+++ b/src/chunk.rs
@@ -97,8 +97,8 @@ impl GenerationChunk {
let json: String =
serde_json::to_string(self).map_err(GenerationChunkError::JsonGenerate)?;
let bytes = json.as_bytes().to_vec();
- let sha = Checksum::sha256(&bytes);
- let meta = ChunkMeta::new_generation(&sha, ended);
+ let checksum = Checksum::sha256(&bytes);
+ let meta = ChunkMeta::new_generation(&checksum, ended);
Ok(DataChunk::new(bytes, meta))
}
}
diff --git a/src/chunkmeta.rs b/src/chunkmeta.rs
index 06a187b..9a435fe 100644
--- a/src/chunkmeta.rs
+++ b/src/chunkmeta.rs
@@ -10,7 +10,8 @@ use std::str::FromStr;
/// We manage three bits of metadata about chunks, in addition to its
/// identifier:
///
-/// * for all chunks, a [SHA256][] checksum of the chunk content
+/// * for all chunks, a [SHA256][] checksum of the chunk content; we
+/// expose this to the server as the chunk "label"
///
/// * for generation chunks, an indication that it is a generation
/// chunk, and a timestamp for when making the generation snapshot
@@ -23,7 +24,7 @@ use std::str::FromStr;
///
/// ~~~json
/// {
-/// "sha256": "09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b",
+/// "label": "09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b",
/// "generation": true,
/// "ended": "2020-09-17T08:17:13+03:00"
/// }
@@ -40,7 +41,7 @@ use std::str::FromStr;
/// [SHA256]: https://en.wikipedia.org/wiki/SHA-2
#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
pub struct ChunkMeta {
- sha256: String,
+ label: String,
// The remaining fields are Options so that JSON parsing doesn't
// insist on them being there in the textual representation.
generation: Option<bool>,
@@ -51,18 +52,18 @@ impl ChunkMeta {
/// Create a new data chunk.
///
/// Data chunks are not for generations.
- pub fn new(sha256: &Checksum) -> Self {
+ pub fn new(checksum: &Checksum) -> Self {
ChunkMeta {
- sha256: sha256.to_string(),
+ label: checksum.to_string(),
generation: None,
ended: None,
}
}
/// Create a new generation chunk.
- pub fn new_generation(sha256: &Checksum, ended: &str) -> Self {
+ pub fn new_generation(checksum: &Checksum, ended: &str) -> Self {
ChunkMeta {
- sha256: sha256.to_string(),
+ label: checksum.to_string(),
generation: Some(true),
ended: Some(ended.to_string()),
}
@@ -78,9 +79,13 @@ impl ChunkMeta {
self.ended.as_deref()
}
- /// SHA256 checksum of the content of the chunk.
- pub fn sha256(&self) -> &str {
- &self.sha256
+ /// The label of the content of the chunk.
+ ///
+ /// The caller should not interpret the label in any way. It
+ /// happens to be a SHA256 of the cleartext contents of the
+ /// checksum for now, but that _will_ change in the future.
+ pub fn label(&self) -> &str {
+ &self.label
}
/// Serialize from a textual JSON representation.
@@ -118,7 +123,7 @@ mod test {
let meta = ChunkMeta::new(&sum);
assert!(!meta.is_generation());
assert_eq!(meta.ended(), None);
- assert_eq!(meta.sha256(), "abcdef");
+ assert_eq!(meta.label(), "abcdef");
}
#[test]
@@ -127,26 +132,26 @@ mod test {
let meta = ChunkMeta::new_generation(&sum, "2020-09-17T08:17:13+03:00");
assert!(meta.is_generation());
assert_eq!(meta.ended(), Some("2020-09-17T08:17:13+03:00"));
- assert_eq!(meta.sha256(), "abcdef");
+ assert_eq!(meta.label(), "abcdef");
}
#[test]
fn data_chunk_from_json() {
- let meta: ChunkMeta = r#"{"sha256": "abcdef"}"#.parse().unwrap();
+ let meta: ChunkMeta = r#"{"label": "abcdef"}"#.parse().unwrap();
assert!(!meta.is_generation());
assert_eq!(meta.ended(), None);
- assert_eq!(meta.sha256(), "abcdef");
+ assert_eq!(meta.label(), "abcdef");
}
#[test]
fn generation_chunk_from_json() {
let meta: ChunkMeta =
- r#"{"sha256": "abcdef", "generation": true, "ended": "2020-09-17T08:17:13+03:00"}"#
+ r#"{"label": "abcdef", "generation": true, "ended": "2020-09-17T08:17:13+03:00"}"#
.parse()
.unwrap();
assert!(meta.is_generation());
assert_eq!(meta.ended(), Some("2020-09-17T08:17:13+03:00"));
- assert_eq!(meta.sha256(), "abcdef");
+ assert_eq!(meta.label(), "abcdef");
}
#[test]
diff --git a/src/client.rs b/src/client.rs
index bcc31b4..b58f89c 100644
--- a/src/client.rs
+++ b/src/client.rs
@@ -130,7 +130,7 @@ impl BackupClient {
/// Does the server have a chunk?
pub async fn has_chunk(&self, meta: &ChunkMeta) -> Result<Option<ChunkId>, ClientError> {
- let body = match self.get("", &[("sha256", meta.sha256())]).await {
+ let body = match self.get("", &[("label", meta.label())]).await {
Ok((_, body)) => body,
Err(err) => return Err(err),
};
diff --git a/src/index.rs b/src/index.rs
index b9d29a2..4a1b9c9 100644
--- a/src/index.rs
+++ b/src/index.rs
@@ -61,8 +61,8 @@ impl Index {
sql::remove(&self.conn, id)
}
- /// Find chunks with a given checksum.
- pub fn find_by_sha256(&self, sha256: &str) -> Result<Vec<ChunkId>, IndexError> {
+ /// Find chunks with a client-assigned label.
+ pub fn find_by_label(&self, sha256: &str) -> Result<Vec<ChunkId>, IndexError> {
sql::find_by_256(&self.conn, sha256)
}
@@ -98,7 +98,7 @@ mod test {
let mut idx = new_index(dir.path());
idx.insert_meta(id.clone(), meta.clone()).unwrap();
assert_eq!(idx.get_meta(&id).unwrap(), meta);
- let ids = idx.find_by_sha256("abc").unwrap();
+ let ids = idx.find_by_label("abc").unwrap();
assert_eq!(ids, vec![id]);
}
@@ -110,7 +110,7 @@ mod test {
let dir = tempdir().unwrap();
let mut idx = new_index(dir.path());
idx.insert_meta(id, meta).unwrap();
- assert_eq!(idx.find_by_sha256("def").unwrap().len(), 0)
+ assert_eq!(idx.find_by_label("def").unwrap().len(), 0)
}
#[test]
@@ -122,7 +122,7 @@ mod test {
let mut idx = new_index(dir.path());
idx.insert_meta(id.clone(), meta).unwrap();
idx.remove_meta(&id).unwrap();
- let ids: Vec<ChunkId> = idx.find_by_sha256("abc").unwrap();
+ let ids: Vec<ChunkId> = idx.find_by_label("abc").unwrap();
assert_eq!(ids, vec![]);
}
@@ -193,12 +193,12 @@ mod sql {
/// Insert a new chunk's metadata into database.
pub fn insert(t: &Transaction, chunkid: &ChunkId, meta: &ChunkMeta) -> Result<(), IndexError> {
let chunkid = format!("{}", chunkid);
- let sha256 = meta.sha256();
+ let label = meta.label();
let generation = if meta.is_generation() { 1 } else { 0 };
let ended = meta.ended();
t.execute(
"INSERT INTO chunks (id, sha256, generation, ended) VALUES (?1, ?2, ?3, ?4)",
- params![chunkid, sha256, generation, ended],
+ params![chunkid, label, generation, ended],
)?;
Ok(())
}
diff --git a/src/indexedstore.rs b/src/indexedstore.rs
index 49953ee..46f9e72 100644
--- a/src/indexedstore.rs
+++ b/src/indexedstore.rs
@@ -63,9 +63,9 @@ impl IndexedStore {
Ok(self.index.get_meta(id)?)
}
- /// Find chunks with a given checksum.
- pub fn find_by_sha256(&self, sha256: &str) -> Result<Vec<ChunkId>, IndexedError> {
- Ok(self.index.find_by_sha256(sha256)?)
+ /// Find chunks with a client-assigned label.
+ pub fn find_by_label(&self, label: &str) -> Result<Vec<ChunkId>, IndexedError> {
+ Ok(self.index.find_by_label(label)?)
}
/// Find all generations.
diff --git a/subplot/server.py b/subplot/server.py
index 2a3e397..de63836 100644
--- a/subplot/server.py
+++ b/subplot/server.py
@@ -69,8 +69,8 @@ def get_chunk_by_id(ctx, chunk_id=None):
_request(ctx, requests.get, url)
-def find_chunks_with_sha(ctx, sha=None):
- url = f"{ctx['server_url']}/chunks?sha256={sha}"
+def find_chunks_with_label(ctx, sha=None):
+ url = f"{ctx['server_url']}/chunks?label={sha}"
_request(ctx, requests.get, url)
diff --git a/subplot/server.yaml b/subplot/server.yaml
index faf8f49..7b7d461 100644
--- a/subplot/server.yaml
+++ b/subplot/server.yaml
@@ -31,11 +31,11 @@
python:
function: get_chunk_by_id
-- when: "I GET /chunks?sha256={sha}"
+- when: "I GET /chunks?label={sha}"
regex: false
impl:
python:
- function: find_chunks_with_sha
+ function: find_chunks_with_label
- when: "I DELETE /chunks/<{var}>"
impl: