diff options
-rw-r--r-- | obnam.md | 37 | ||||
-rw-r--r-- | src/bin/obnam-server.rs | 4 | ||||
-rw-r--r-- | src/chunk.rs | 4 | ||||
-rw-r--r-- | src/chunkmeta.rs | 37 | ||||
-rw-r--r-- | src/client.rs | 2 | ||||
-rw-r--r-- | src/index.rs | 14 | ||||
-rw-r--r-- | src/indexedstore.rs | 6 | ||||
-rw-r--r-- | subplot/server.py | 4 | ||||
-rw-r--r-- | subplot/server.yaml | 4 |
9 files changed, 59 insertions, 53 deletions
@@ -835,12 +835,13 @@ Chunks consist of arbitrary binary data, a small amount of metadata, and an identifier chosen by the server. The chunk metadata is a JSON object, consisting of the following fields: -* `sha256` — the SHA256 checksum of the chunk contents as +* `label` — the SHA256 checksum of the chunk contents as determined by the client - this MUST be set for every chunk, including generation chunks - the server allows for searching based on this field - note that the server doesn't verify this in any way, to pave way - for future client-side encryption of the chunk data + for future client-side encryption of the chunk data, including the + label * `generation` — set to `true` if the chunk represents a generation - may also be set to `false` or `null` or be missing entirely @@ -865,7 +866,7 @@ The server has the following API for managing chunks: server, return its randomly chosen identifier * `GET /chunks/<ID>` — retrieve a chunk (and its metadata) from the server, given a chunk identifier -* `GET /chunks?sha256=xyzzy` — find chunks on the server whose +* `GET /chunks?label=xyzzy` — find chunks on the server whose metadata indicates their contents has a given SHA256 checksum * `GET /chunks?generation=true` — find generation chunks * `GET /chunks?data=True` — find chunks with file data @@ -903,7 +904,7 @@ metadata are returned in a JSON object: ~~~json { "fe20734b-edb3-432f-83c3-d35fe15969dd": { - "sha256": "09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b", + "label": "09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b", "generation": null, "ended: null, } @@ -1036,7 +1037,7 @@ storage of backed up data. ~~~scenario given a working Obnam system and a file data.dat containing some random data -when I POST data.dat to /chunks, with chunk-meta: {"sha256":"abc"} +when I POST data.dat to /chunks, with chunk-meta: {"label":"abc"} then HTTP status code is 201 and content-type is application/json and the JSON body has a field chunk_id, henceforth ID @@ -1049,17 +1050,17 @@ We must be able to retrieve it. when I GET /chunks/<ID> then HTTP status code is 200 and content-type is application/octet-stream -and chunk-meta is {"sha256":"abc","generation":null,"ended":null} +and chunk-meta is {"label":"abc","generation":null,"ended":null} and the body matches file data.dat ~~~ We must also be able to find it based on metadata. ~~~scenario -when I GET /chunks?sha256=abc +when I GET /chunks?label=abc then HTTP status code is 200 and content-type is application/json -and the JSON body matches {"<ID>":{"sha256":"abc","generation":null,"ended":null}} +and the JSON body matches {"<ID>":{"label":"abc","generation":null,"ended":null}} ~~~ Finally, we must be able to delete it. After that, we must not be able @@ -1072,7 +1073,7 @@ then HTTP status code is 200 when I GET /chunks/<ID> then HTTP status code is 404 -when I GET /chunks?sha256=abc +when I GET /chunks?label=abc then HTTP status code is 200 and content-type is application/json and the JSON body matches {} @@ -1095,7 +1096,7 @@ We must get an empty result if searching for chunks that don't exist. ~~~scenario given a working Obnam system -when I GET /chunks?sha256=abc +when I GET /chunks?label=abc then HTTP status code is 200 and content-type is application/json and the JSON body matches {} @@ -1122,7 +1123,7 @@ First, create a chunk. ~~~scenario given a working Obnam system and a file data.dat containing some random data -when I POST data.dat to /chunks, with chunk-meta: {"sha256":"abc"} +when I POST data.dat to /chunks, with chunk-meta: {"label":"abc"} then HTTP status code is 201 and content-type is application/json and the JSON body has a field chunk_id, henceforth ID @@ -1138,10 +1139,10 @@ given a running chunk server Can we still find it by its metadata? ~~~scenario -when I GET /chunks?sha256=abc +when I GET /chunks?label=abc then HTTP status code is 200 and content-type is application/json -and the JSON body matches {"<ID>":{"sha256":"abc","generation":null,"ended":null}} +and the JSON body matches {"<ID>":{"label":"abc","generation":null,"ended":null}} ~~~ Can we still retrieve it by its identifier? @@ -1150,7 +1151,7 @@ Can we still retrieve it by its identifier? when I GET /chunks/<ID> then HTTP status code is 200 and content-type is application/octet-stream -and chunk-meta is {"sha256":"abc","generation":null,"ended":null} +and chunk-meta is {"label":"abc","generation":null,"ended":null} and the body matches file data.dat ~~~ @@ -1164,14 +1165,14 @@ server more chatty. ~~~scenario given a working Obnam system and a file data1.dat containing some random data -when I POST data1.dat to /chunks, with chunk-meta: {"sha256":"qwerty"} +when I POST data1.dat to /chunks, with chunk-meta: {"label":"qwerty"} then the JSON body has a field chunk_id, henceforth ID and chunk server's stderr doesn't contain "Obnam server starting up" and chunk server's stderr doesn't contain "created chunk <ID>" given a running chunk server with environment {"OBNAM_SERVER_LOG": "info"} and a file data2.dat containing some random data -when I POST data2.dat to /chunks, with chunk-meta: {"sha256":"xyz"} +when I POST data2.dat to /chunks, with chunk-meta: {"label":"xyz"} then the JSON body has a field chunk_id, henceforth ID and chunk server's stderr contains "Obnam server starting up" and chunk server's stderr contains "created chunk <ID>" @@ -1274,8 +1275,8 @@ roots: [live] given a working Obnam system given a client config based on smoke.yaml given a file cleartext.dat containing some random data -when I run obnam encrypt-chunk cleartext.dat encrypted.dat '{"sha256":"fake"}' -when I run obnam decrypt-chunk encrypted.dat decrypted.dat '{"sha256":"fake"}' +when I run obnam encrypt-chunk cleartext.dat encrypted.dat '{"label":"fake"}' +when I run obnam decrypt-chunk encrypted.dat decrypted.dat '{"label":"fake"}' then files cleartext.dat and encrypted.dat are different then files cleartext.dat and decrypted.dat are identical ~~~ diff --git a/src/bin/obnam-server.rs b/src/bin/obnam-server.rs index f06b7b5..0b80854 100644 --- a/src/bin/obnam-server.rs +++ b/src/bin/obnam-server.rs @@ -155,8 +155,8 @@ pub async fn search_chunks( } if key == "generation" && value == "true" { store.find_generations().expect("SQL lookup failed") - } else if key == "sha256" { - store.find_by_sha256(value).expect("SQL lookup failed") + } else if key == "label" { + store.find_by_label(value).expect("SQL lookup failed") } else { error!("unknown search key {:?}", key); return Ok(ChunkResult::BadRequest); diff --git a/src/chunk.rs b/src/chunk.rs index 15e3288..a37aa57 100644 --- a/src/chunk.rs +++ b/src/chunk.rs @@ -97,8 +97,8 @@ impl GenerationChunk { let json: String = serde_json::to_string(self).map_err(GenerationChunkError::JsonGenerate)?; let bytes = json.as_bytes().to_vec(); - let sha = Checksum::sha256(&bytes); - let meta = ChunkMeta::new_generation(&sha, ended); + let checksum = Checksum::sha256(&bytes); + let meta = ChunkMeta::new_generation(&checksum, ended); Ok(DataChunk::new(bytes, meta)) } } diff --git a/src/chunkmeta.rs b/src/chunkmeta.rs index 06a187b..9a435fe 100644 --- a/src/chunkmeta.rs +++ b/src/chunkmeta.rs @@ -10,7 +10,8 @@ use std::str::FromStr; /// We manage three bits of metadata about chunks, in addition to its /// identifier: /// -/// * for all chunks, a [SHA256][] checksum of the chunk content +/// * for all chunks, a [SHA256][] checksum of the chunk content; we +/// expose this to the server as the chunk "label" /// /// * for generation chunks, an indication that it is a generation /// chunk, and a timestamp for when making the generation snapshot @@ -23,7 +24,7 @@ use std::str::FromStr; /// /// ~~~json /// { -/// "sha256": "09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b", +/// "label": "09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b", /// "generation": true, /// "ended": "2020-09-17T08:17:13+03:00" /// } @@ -40,7 +41,7 @@ use std::str::FromStr; /// [SHA256]: https://en.wikipedia.org/wiki/SHA-2 #[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)] pub struct ChunkMeta { - sha256: String, + label: String, // The remaining fields are Options so that JSON parsing doesn't // insist on them being there in the textual representation. generation: Option<bool>, @@ -51,18 +52,18 @@ impl ChunkMeta { /// Create a new data chunk. /// /// Data chunks are not for generations. - pub fn new(sha256: &Checksum) -> Self { + pub fn new(checksum: &Checksum) -> Self { ChunkMeta { - sha256: sha256.to_string(), + label: checksum.to_string(), generation: None, ended: None, } } /// Create a new generation chunk. - pub fn new_generation(sha256: &Checksum, ended: &str) -> Self { + pub fn new_generation(checksum: &Checksum, ended: &str) -> Self { ChunkMeta { - sha256: sha256.to_string(), + label: checksum.to_string(), generation: Some(true), ended: Some(ended.to_string()), } @@ -78,9 +79,13 @@ impl ChunkMeta { self.ended.as_deref() } - /// SHA256 checksum of the content of the chunk. - pub fn sha256(&self) -> &str { - &self.sha256 + /// The label of the content of the chunk. + /// + /// The caller should not interpret the label in any way. It + /// happens to be a SHA256 of the cleartext contents of the + /// checksum for now, but that _will_ change in the future. + pub fn label(&self) -> &str { + &self.label } /// Serialize from a textual JSON representation. @@ -118,7 +123,7 @@ mod test { let meta = ChunkMeta::new(&sum); assert!(!meta.is_generation()); assert_eq!(meta.ended(), None); - assert_eq!(meta.sha256(), "abcdef"); + assert_eq!(meta.label(), "abcdef"); } #[test] @@ -127,26 +132,26 @@ mod test { let meta = ChunkMeta::new_generation(&sum, "2020-09-17T08:17:13+03:00"); assert!(meta.is_generation()); assert_eq!(meta.ended(), Some("2020-09-17T08:17:13+03:00")); - assert_eq!(meta.sha256(), "abcdef"); + assert_eq!(meta.label(), "abcdef"); } #[test] fn data_chunk_from_json() { - let meta: ChunkMeta = r#"{"sha256": "abcdef"}"#.parse().unwrap(); + let meta: ChunkMeta = r#"{"label": "abcdef"}"#.parse().unwrap(); assert!(!meta.is_generation()); assert_eq!(meta.ended(), None); - assert_eq!(meta.sha256(), "abcdef"); + assert_eq!(meta.label(), "abcdef"); } #[test] fn generation_chunk_from_json() { let meta: ChunkMeta = - r#"{"sha256": "abcdef", "generation": true, "ended": "2020-09-17T08:17:13+03:00"}"# + r#"{"label": "abcdef", "generation": true, "ended": "2020-09-17T08:17:13+03:00"}"# .parse() .unwrap(); assert!(meta.is_generation()); assert_eq!(meta.ended(), Some("2020-09-17T08:17:13+03:00")); - assert_eq!(meta.sha256(), "abcdef"); + assert_eq!(meta.label(), "abcdef"); } #[test] diff --git a/src/client.rs b/src/client.rs index bcc31b4..b58f89c 100644 --- a/src/client.rs +++ b/src/client.rs @@ -130,7 +130,7 @@ impl BackupClient { /// Does the server have a chunk? pub async fn has_chunk(&self, meta: &ChunkMeta) -> Result<Option<ChunkId>, ClientError> { - let body = match self.get("", &[("sha256", meta.sha256())]).await { + let body = match self.get("", &[("label", meta.label())]).await { Ok((_, body)) => body, Err(err) => return Err(err), }; diff --git a/src/index.rs b/src/index.rs index b9d29a2..4a1b9c9 100644 --- a/src/index.rs +++ b/src/index.rs @@ -61,8 +61,8 @@ impl Index { sql::remove(&self.conn, id) } - /// Find chunks with a given checksum. - pub fn find_by_sha256(&self, sha256: &str) -> Result<Vec<ChunkId>, IndexError> { + /// Find chunks with a client-assigned label. + pub fn find_by_label(&self, sha256: &str) -> Result<Vec<ChunkId>, IndexError> { sql::find_by_256(&self.conn, sha256) } @@ -98,7 +98,7 @@ mod test { let mut idx = new_index(dir.path()); idx.insert_meta(id.clone(), meta.clone()).unwrap(); assert_eq!(idx.get_meta(&id).unwrap(), meta); - let ids = idx.find_by_sha256("abc").unwrap(); + let ids = idx.find_by_label("abc").unwrap(); assert_eq!(ids, vec![id]); } @@ -110,7 +110,7 @@ mod test { let dir = tempdir().unwrap(); let mut idx = new_index(dir.path()); idx.insert_meta(id, meta).unwrap(); - assert_eq!(idx.find_by_sha256("def").unwrap().len(), 0) + assert_eq!(idx.find_by_label("def").unwrap().len(), 0) } #[test] @@ -122,7 +122,7 @@ mod test { let mut idx = new_index(dir.path()); idx.insert_meta(id.clone(), meta).unwrap(); idx.remove_meta(&id).unwrap(); - let ids: Vec<ChunkId> = idx.find_by_sha256("abc").unwrap(); + let ids: Vec<ChunkId> = idx.find_by_label("abc").unwrap(); assert_eq!(ids, vec![]); } @@ -193,12 +193,12 @@ mod sql { /// Insert a new chunk's metadata into database. pub fn insert(t: &Transaction, chunkid: &ChunkId, meta: &ChunkMeta) -> Result<(), IndexError> { let chunkid = format!("{}", chunkid); - let sha256 = meta.sha256(); + let label = meta.label(); let generation = if meta.is_generation() { 1 } else { 0 }; let ended = meta.ended(); t.execute( "INSERT INTO chunks (id, sha256, generation, ended) VALUES (?1, ?2, ?3, ?4)", - params![chunkid, sha256, generation, ended], + params![chunkid, label, generation, ended], )?; Ok(()) } diff --git a/src/indexedstore.rs b/src/indexedstore.rs index 49953ee..46f9e72 100644 --- a/src/indexedstore.rs +++ b/src/indexedstore.rs @@ -63,9 +63,9 @@ impl IndexedStore { Ok(self.index.get_meta(id)?) } - /// Find chunks with a given checksum. - pub fn find_by_sha256(&self, sha256: &str) -> Result<Vec<ChunkId>, IndexedError> { - Ok(self.index.find_by_sha256(sha256)?) + /// Find chunks with a client-assigned label. + pub fn find_by_label(&self, label: &str) -> Result<Vec<ChunkId>, IndexedError> { + Ok(self.index.find_by_label(label)?) } /// Find all generations. diff --git a/subplot/server.py b/subplot/server.py index 2a3e397..de63836 100644 --- a/subplot/server.py +++ b/subplot/server.py @@ -69,8 +69,8 @@ def get_chunk_by_id(ctx, chunk_id=None): _request(ctx, requests.get, url) -def find_chunks_with_sha(ctx, sha=None): - url = f"{ctx['server_url']}/chunks?sha256={sha}" +def find_chunks_with_label(ctx, sha=None): + url = f"{ctx['server_url']}/chunks?label={sha}" _request(ctx, requests.get, url) diff --git a/subplot/server.yaml b/subplot/server.yaml index faf8f49..7b7d461 100644 --- a/subplot/server.yaml +++ b/subplot/server.yaml @@ -31,11 +31,11 @@ python: function: get_chunk_by_id -- when: "I GET /chunks?sha256={sha}" +- when: "I GET /chunks?label={sha}" regex: false impl: python: - function: find_chunks_with_sha + function: find_chunks_with_label - when: "I DELETE /chunks/<{var}>" impl: |