From 29e68de7973f2b294c50b7d33ef216a8f095b9f9 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sat, 12 Mar 2022 07:52:05 +0200 Subject: feat! rename metadata field "sha256" to "label" The field still contains a cleartext SHa256 of the cleartext chunk data, but this makes it clearer that it may contain other data. This is a breaking change: the server API won't work with an old client, and the new client won't work with an old server. To avoid the breakage would require more effort than is warranted at this time, given the very small number of users of Obnam. Sorry. Sponsored-by: author --- obnam.md | 37 +++++++++++++++++++------------------ src/bin/obnam-server.rs | 4 ++-- src/chunk.rs | 4 ++-- src/chunkmeta.rs | 37 +++++++++++++++++++++---------------- src/client.rs | 2 +- src/index.rs | 14 +++++++------- src/indexedstore.rs | 6 +++--- subplot/server.py | 4 ++-- subplot/server.yaml | 4 ++-- 9 files changed, 59 insertions(+), 53 deletions(-) diff --git a/obnam.md b/obnam.md index 50c293c..8b80eed 100644 --- a/obnam.md +++ b/obnam.md @@ -835,12 +835,13 @@ Chunks consist of arbitrary binary data, a small amount of metadata, and an identifier chosen by the server. The chunk metadata is a JSON object, consisting of the following fields: -* `sha256` — the SHA256 checksum of the chunk contents as +* `label` — the SHA256 checksum of the chunk contents as determined by the client - this MUST be set for every chunk, including generation chunks - the server allows for searching based on this field - note that the server doesn't verify this in any way, to pave way - for future client-side encryption of the chunk data + for future client-side encryption of the chunk data, including the + label * `generation` — set to `true` if the chunk represents a generation - may also be set to `false` or `null` or be missing entirely @@ -865,7 +866,7 @@ The server has the following API for managing chunks: server, return its randomly chosen identifier * `GET /chunks/` — retrieve a chunk (and its metadata) from the server, given a chunk identifier -* `GET /chunks?sha256=xyzzy` — find chunks on the server whose +* `GET /chunks?label=xyzzy` — find chunks on the server whose metadata indicates their contents has a given SHA256 checksum * `GET /chunks?generation=true` — find generation chunks * `GET /chunks?data=True` — find chunks with file data @@ -903,7 +904,7 @@ metadata are returned in a JSON object: ~~~json { "fe20734b-edb3-432f-83c3-d35fe15969dd": { - "sha256": "09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b", + "label": "09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b", "generation": null, "ended: null, } @@ -1036,7 +1037,7 @@ storage of backed up data. ~~~scenario given a working Obnam system and a file data.dat containing some random data -when I POST data.dat to /chunks, with chunk-meta: {"sha256":"abc"} +when I POST data.dat to /chunks, with chunk-meta: {"label":"abc"} then HTTP status code is 201 and content-type is application/json and the JSON body has a field chunk_id, henceforth ID @@ -1049,17 +1050,17 @@ We must be able to retrieve it. when I GET /chunks/ then HTTP status code is 200 and content-type is application/octet-stream -and chunk-meta is {"sha256":"abc","generation":null,"ended":null} +and chunk-meta is {"label":"abc","generation":null,"ended":null} and the body matches file data.dat ~~~ We must also be able to find it based on metadata. ~~~scenario -when I GET /chunks?sha256=abc +when I GET /chunks?label=abc then HTTP status code is 200 and content-type is application/json -and the JSON body matches {"":{"sha256":"abc","generation":null,"ended":null}} +and the JSON body matches {"":{"label":"abc","generation":null,"ended":null}} ~~~ Finally, we must be able to delete it. After that, we must not be able @@ -1072,7 +1073,7 @@ then HTTP status code is 200 when I GET /chunks/ then HTTP status code is 404 -when I GET /chunks?sha256=abc +when I GET /chunks?label=abc then HTTP status code is 200 and content-type is application/json and the JSON body matches {} @@ -1095,7 +1096,7 @@ We must get an empty result if searching for chunks that don't exist. ~~~scenario given a working Obnam system -when I GET /chunks?sha256=abc +when I GET /chunks?label=abc then HTTP status code is 200 and content-type is application/json and the JSON body matches {} @@ -1122,7 +1123,7 @@ First, create a chunk. ~~~scenario given a working Obnam system and a file data.dat containing some random data -when I POST data.dat to /chunks, with chunk-meta: {"sha256":"abc"} +when I POST data.dat to /chunks, with chunk-meta: {"label":"abc"} then HTTP status code is 201 and content-type is application/json and the JSON body has a field chunk_id, henceforth ID @@ -1138,10 +1139,10 @@ given a running chunk server Can we still find it by its metadata? ~~~scenario -when I GET /chunks?sha256=abc +when I GET /chunks?label=abc then HTTP status code is 200 and content-type is application/json -and the JSON body matches {"":{"sha256":"abc","generation":null,"ended":null}} +and the JSON body matches {"":{"label":"abc","generation":null,"ended":null}} ~~~ Can we still retrieve it by its identifier? @@ -1150,7 +1151,7 @@ Can we still retrieve it by its identifier? when I GET /chunks/ then HTTP status code is 200 and content-type is application/octet-stream -and chunk-meta is {"sha256":"abc","generation":null,"ended":null} +and chunk-meta is {"label":"abc","generation":null,"ended":null} and the body matches file data.dat ~~~ @@ -1164,14 +1165,14 @@ server more chatty. ~~~scenario given a working Obnam system and a file data1.dat containing some random data -when I POST data1.dat to /chunks, with chunk-meta: {"sha256":"qwerty"} +when I POST data1.dat to /chunks, with chunk-meta: {"label":"qwerty"} then the JSON body has a field chunk_id, henceforth ID and chunk server's stderr doesn't contain "Obnam server starting up" and chunk server's stderr doesn't contain "created chunk " given a running chunk server with environment {"OBNAM_SERVER_LOG": "info"} and a file data2.dat containing some random data -when I POST data2.dat to /chunks, with chunk-meta: {"sha256":"xyz"} +when I POST data2.dat to /chunks, with chunk-meta: {"label":"xyz"} then the JSON body has a field chunk_id, henceforth ID and chunk server's stderr contains "Obnam server starting up" and chunk server's stderr contains "created chunk " @@ -1274,8 +1275,8 @@ roots: [live] given a working Obnam system given a client config based on smoke.yaml given a file cleartext.dat containing some random data -when I run obnam encrypt-chunk cleartext.dat encrypted.dat '{"sha256":"fake"}' -when I run obnam decrypt-chunk encrypted.dat decrypted.dat '{"sha256":"fake"}' +when I run obnam encrypt-chunk cleartext.dat encrypted.dat '{"label":"fake"}' +when I run obnam decrypt-chunk encrypted.dat decrypted.dat '{"label":"fake"}' then files cleartext.dat and encrypted.dat are different then files cleartext.dat and decrypted.dat are identical ~~~ diff --git a/src/bin/obnam-server.rs b/src/bin/obnam-server.rs index f06b7b5..0b80854 100644 --- a/src/bin/obnam-server.rs +++ b/src/bin/obnam-server.rs @@ -155,8 +155,8 @@ pub async fn search_chunks( } if key == "generation" && value == "true" { store.find_generations().expect("SQL lookup failed") - } else if key == "sha256" { - store.find_by_sha256(value).expect("SQL lookup failed") + } else if key == "label" { + store.find_by_label(value).expect("SQL lookup failed") } else { error!("unknown search key {:?}", key); return Ok(ChunkResult::BadRequest); diff --git a/src/chunk.rs b/src/chunk.rs index 15e3288..a37aa57 100644 --- a/src/chunk.rs +++ b/src/chunk.rs @@ -97,8 +97,8 @@ impl GenerationChunk { let json: String = serde_json::to_string(self).map_err(GenerationChunkError::JsonGenerate)?; let bytes = json.as_bytes().to_vec(); - let sha = Checksum::sha256(&bytes); - let meta = ChunkMeta::new_generation(&sha, ended); + let checksum = Checksum::sha256(&bytes); + let meta = ChunkMeta::new_generation(&checksum, ended); Ok(DataChunk::new(bytes, meta)) } } diff --git a/src/chunkmeta.rs b/src/chunkmeta.rs index 06a187b..9a435fe 100644 --- a/src/chunkmeta.rs +++ b/src/chunkmeta.rs @@ -10,7 +10,8 @@ use std::str::FromStr; /// We manage three bits of metadata about chunks, in addition to its /// identifier: /// -/// * for all chunks, a [SHA256][] checksum of the chunk content +/// * for all chunks, a [SHA256][] checksum of the chunk content; we +/// expose this to the server as the chunk "label" /// /// * for generation chunks, an indication that it is a generation /// chunk, and a timestamp for when making the generation snapshot @@ -23,7 +24,7 @@ use std::str::FromStr; /// /// ~~~json /// { -/// "sha256": "09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b", +/// "label": "09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b", /// "generation": true, /// "ended": "2020-09-17T08:17:13+03:00" /// } @@ -40,7 +41,7 @@ use std::str::FromStr; /// [SHA256]: https://en.wikipedia.org/wiki/SHA-2 #[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)] pub struct ChunkMeta { - sha256: String, + label: String, // The remaining fields are Options so that JSON parsing doesn't // insist on them being there in the textual representation. generation: Option, @@ -51,18 +52,18 @@ impl ChunkMeta { /// Create a new data chunk. /// /// Data chunks are not for generations. - pub fn new(sha256: &Checksum) -> Self { + pub fn new(checksum: &Checksum) -> Self { ChunkMeta { - sha256: sha256.to_string(), + label: checksum.to_string(), generation: None, ended: None, } } /// Create a new generation chunk. - pub fn new_generation(sha256: &Checksum, ended: &str) -> Self { + pub fn new_generation(checksum: &Checksum, ended: &str) -> Self { ChunkMeta { - sha256: sha256.to_string(), + label: checksum.to_string(), generation: Some(true), ended: Some(ended.to_string()), } @@ -78,9 +79,13 @@ impl ChunkMeta { self.ended.as_deref() } - /// SHA256 checksum of the content of the chunk. - pub fn sha256(&self) -> &str { - &self.sha256 + /// The label of the content of the chunk. + /// + /// The caller should not interpret the label in any way. It + /// happens to be a SHA256 of the cleartext contents of the + /// checksum for now, but that _will_ change in the future. + pub fn label(&self) -> &str { + &self.label } /// Serialize from a textual JSON representation. @@ -118,7 +123,7 @@ mod test { let meta = ChunkMeta::new(&sum); assert!(!meta.is_generation()); assert_eq!(meta.ended(), None); - assert_eq!(meta.sha256(), "abcdef"); + assert_eq!(meta.label(), "abcdef"); } #[test] @@ -127,26 +132,26 @@ mod test { let meta = ChunkMeta::new_generation(&sum, "2020-09-17T08:17:13+03:00"); assert!(meta.is_generation()); assert_eq!(meta.ended(), Some("2020-09-17T08:17:13+03:00")); - assert_eq!(meta.sha256(), "abcdef"); + assert_eq!(meta.label(), "abcdef"); } #[test] fn data_chunk_from_json() { - let meta: ChunkMeta = r#"{"sha256": "abcdef"}"#.parse().unwrap(); + let meta: ChunkMeta = r#"{"label": "abcdef"}"#.parse().unwrap(); assert!(!meta.is_generation()); assert_eq!(meta.ended(), None); - assert_eq!(meta.sha256(), "abcdef"); + assert_eq!(meta.label(), "abcdef"); } #[test] fn generation_chunk_from_json() { let meta: ChunkMeta = - r#"{"sha256": "abcdef", "generation": true, "ended": "2020-09-17T08:17:13+03:00"}"# + r#"{"label": "abcdef", "generation": true, "ended": "2020-09-17T08:17:13+03:00"}"# .parse() .unwrap(); assert!(meta.is_generation()); assert_eq!(meta.ended(), Some("2020-09-17T08:17:13+03:00")); - assert_eq!(meta.sha256(), "abcdef"); + assert_eq!(meta.label(), "abcdef"); } #[test] diff --git a/src/client.rs b/src/client.rs index bcc31b4..b58f89c 100644 --- a/src/client.rs +++ b/src/client.rs @@ -130,7 +130,7 @@ impl BackupClient { /// Does the server have a chunk? pub async fn has_chunk(&self, meta: &ChunkMeta) -> Result, ClientError> { - let body = match self.get("", &[("sha256", meta.sha256())]).await { + let body = match self.get("", &[("label", meta.label())]).await { Ok((_, body)) => body, Err(err) => return Err(err), }; diff --git a/src/index.rs b/src/index.rs index b9d29a2..4a1b9c9 100644 --- a/src/index.rs +++ b/src/index.rs @@ -61,8 +61,8 @@ impl Index { sql::remove(&self.conn, id) } - /// Find chunks with a given checksum. - pub fn find_by_sha256(&self, sha256: &str) -> Result, IndexError> { + /// Find chunks with a client-assigned label. + pub fn find_by_label(&self, sha256: &str) -> Result, IndexError> { sql::find_by_256(&self.conn, sha256) } @@ -98,7 +98,7 @@ mod test { let mut idx = new_index(dir.path()); idx.insert_meta(id.clone(), meta.clone()).unwrap(); assert_eq!(idx.get_meta(&id).unwrap(), meta); - let ids = idx.find_by_sha256("abc").unwrap(); + let ids = idx.find_by_label("abc").unwrap(); assert_eq!(ids, vec![id]); } @@ -110,7 +110,7 @@ mod test { let dir = tempdir().unwrap(); let mut idx = new_index(dir.path()); idx.insert_meta(id, meta).unwrap(); - assert_eq!(idx.find_by_sha256("def").unwrap().len(), 0) + assert_eq!(idx.find_by_label("def").unwrap().len(), 0) } #[test] @@ -122,7 +122,7 @@ mod test { let mut idx = new_index(dir.path()); idx.insert_meta(id.clone(), meta).unwrap(); idx.remove_meta(&id).unwrap(); - let ids: Vec = idx.find_by_sha256("abc").unwrap(); + let ids: Vec = idx.find_by_label("abc").unwrap(); assert_eq!(ids, vec![]); } @@ -193,12 +193,12 @@ mod sql { /// Insert a new chunk's metadata into database. pub fn insert(t: &Transaction, chunkid: &ChunkId, meta: &ChunkMeta) -> Result<(), IndexError> { let chunkid = format!("{}", chunkid); - let sha256 = meta.sha256(); + let label = meta.label(); let generation = if meta.is_generation() { 1 } else { 0 }; let ended = meta.ended(); t.execute( "INSERT INTO chunks (id, sha256, generation, ended) VALUES (?1, ?2, ?3, ?4)", - params![chunkid, sha256, generation, ended], + params![chunkid, label, generation, ended], )?; Ok(()) } diff --git a/src/indexedstore.rs b/src/indexedstore.rs index 49953ee..46f9e72 100644 --- a/src/indexedstore.rs +++ b/src/indexedstore.rs @@ -63,9 +63,9 @@ impl IndexedStore { Ok(self.index.get_meta(id)?) } - /// Find chunks with a given checksum. - pub fn find_by_sha256(&self, sha256: &str) -> Result, IndexedError> { - Ok(self.index.find_by_sha256(sha256)?) + /// Find chunks with a client-assigned label. + pub fn find_by_label(&self, label: &str) -> Result, IndexedError> { + Ok(self.index.find_by_label(label)?) } /// Find all generations. diff --git a/subplot/server.py b/subplot/server.py index 2a3e397..de63836 100644 --- a/subplot/server.py +++ b/subplot/server.py @@ -69,8 +69,8 @@ def get_chunk_by_id(ctx, chunk_id=None): _request(ctx, requests.get, url) -def find_chunks_with_sha(ctx, sha=None): - url = f"{ctx['server_url']}/chunks?sha256={sha}" +def find_chunks_with_label(ctx, sha=None): + url = f"{ctx['server_url']}/chunks?label={sha}" _request(ctx, requests.get, url) diff --git a/subplot/server.yaml b/subplot/server.yaml index faf8f49..7b7d461 100644 --- a/subplot/server.yaml +++ b/subplot/server.yaml @@ -31,11 +31,11 @@ python: function: get_chunk_by_id -- when: "I GET /chunks?sha256={sha}" +- when: "I GET /chunks?label={sha}" regex: false impl: python: - function: find_chunks_with_sha + function: find_chunks_with_label - when: "I DELETE /chunks/<{var}>" impl: -- cgit v1.2.1