From 7e49a52cee01a26b91253abb38dcc40b5ae727e2 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Mon, 21 Mar 2022 17:15:51 +0200 Subject: feat! drop generation related fields from chunk metadata Sponsored-by: author --- obnam.md | 8 +++--- src/backup_run.rs | 2 +- src/bin/obnam-server.rs | 4 +-- src/chunk.rs | 6 ++--- src/chunkmeta.rs | 55 +++++-------------------------------- src/index.rs | 72 +++++-------------------------------------------- src/indexedstore.rs | 5 ---- 7 files changed, 21 insertions(+), 131 deletions(-) diff --git a/obnam.md b/obnam.md index da641a5..68aa01e 100644 --- a/obnam.md +++ b/obnam.md @@ -1112,7 +1112,7 @@ We must be able to retrieve it. when I GET /chunks/ then HTTP status code is 200 and content-type is application/octet-stream -and chunk-meta is {"label":"abc","generation":null,"ended":null} +and chunk-meta is {"label":"abc"} and the body matches file data.dat ~~~ @@ -1122,7 +1122,7 @@ We must also be able to find it based on metadata. when I GET /chunks?label=abc then HTTP status code is 200 and content-type is application/json -and the JSON body matches {"":{"label":"abc","generation":null,"ended":null}} +and the JSON body matches {"":{"label":"abc"}} ~~~ Finally, we must be able to delete it. After that, we must not be able @@ -1204,7 +1204,7 @@ Can we still find it by its metadata? when I GET /chunks?label=abc then HTTP status code is 200 and content-type is application/json -and the JSON body matches {"":{"label":"abc","generation":null,"ended":null}} +and the JSON body matches {"":{"label":"abc"}} ~~~ Can we still retrieve it by its identifier? @@ -1213,7 +1213,7 @@ Can we still retrieve it by its identifier? when I GET /chunks/ then HTTP status code is 200 and content-type is application/octet-stream -and chunk-meta is {"label":"abc","generation":null,"ended":null} +and chunk-meta is {"label":"abc"} and the body matches file data.dat ~~~ diff --git a/src/backup_run.rs b/src/backup_run.rs index 5ceaed3..21140ba 100644 --- a/src/backup_run.rs +++ b/src/backup_run.rs @@ -356,7 +356,7 @@ impl<'a> BackupRun<'a> { info!("upload SQLite {}", filename.display()); let ids = self.upload_regular_file(filename, size).await?; let gen = GenerationChunk::new(ids); - let data = gen.to_data_chunk(¤t_timestamp())?; + let data = gen.to_data_chunk()?; let gen_id = self.client.upload_chunk(data).await?; info!("uploaded generation {}", gen_id); Ok(gen_id) diff --git a/src/bin/obnam-server.rs b/src/bin/obnam-server.rs index 0b80854..5be2cee 100644 --- a/src/bin/obnam-server.rs +++ b/src/bin/obnam-server.rs @@ -153,9 +153,7 @@ pub async fn search_chunks( error!("search has more than one key to search for"); return Ok(ChunkResult::BadRequest); } - if key == "generation" && value == "true" { - store.find_generations().expect("SQL lookup failed") - } else if key == "label" { + if key == "label" { store.find_by_label(value).expect("SQL lookup failed") } else { error!("unknown search key {:?}", key); diff --git a/src/chunk.rs b/src/chunk.rs index 238bd01..27a3ab9 100644 --- a/src/chunk.rs +++ b/src/chunk.rs @@ -93,12 +93,12 @@ impl GenerationChunk { } /// Convert generation chunk to a data chunk. - pub fn to_data_chunk(&self, ended: &str) -> Result { + pub fn to_data_chunk(&self) -> Result { let json: String = serde_json::to_string(self).map_err(GenerationChunkError::JsonGenerate)?; let bytes = json.as_bytes().to_vec(); let checksum = Checksum::sha256(&bytes); - let meta = ChunkMeta::new_generation(&checksum, ended); + let meta = ChunkMeta::new(&checksum); Ok(DataChunk::new(bytes, meta)) } } @@ -186,7 +186,7 @@ impl ClientTrust { let json: String = serde_json::to_string(self).map_err(ClientTrustError::JsonGenerate)?; let bytes = json.as_bytes().to_vec(); let checksum = Checksum::sha256_from_str_unchecked("client-trust"); - let meta = ChunkMeta::new_generation(&checksum, ""); + let meta = ChunkMeta::new(&checksum); Ok(DataChunk::new(bytes, meta)) } diff --git a/src/chunkmeta.rs b/src/chunkmeta.rs index 9a435fe..33c1070 100644 --- a/src/chunkmeta.rs +++ b/src/chunkmeta.rs @@ -7,26 +7,15 @@ use std::str::FromStr; /// Metadata about chunks. /// -/// We manage three bits of metadata about chunks, in addition to its -/// identifier: -/// -/// * for all chunks, a [SHA256][] checksum of the chunk content; we -/// expose this to the server as the chunk "label" -/// -/// * for generation chunks, an indication that it is a generation -/// chunk, and a timestamp for when making the generation snapshot -/// ended -/// -/// There is no syntax or semantics imposed on the timestamp, but a -/// client should probably use [ISO 8601][] representation. +/// We a single piece of metadata about chunks, in addition to its +/// identifier: a label assigned by the client. Currently, this is a +/// [SHA256][] checksum of the chunk content. /// /// For HTTP, the metadata will be serialised as a JSON object, like this: /// /// ~~~json /// { /// "label": "09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b", -/// "generation": true, -/// "ended": "2020-09-17T08:17:13+03:00" /// } /// ~~~ /// @@ -42,10 +31,6 @@ use std::str::FromStr; #[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)] pub struct ChunkMeta { label: String, - // The remaining fields are Options so that JSON parsing doesn't - // insist on them being there in the textual representation. - generation: Option, - ended: Option, } impl ChunkMeta { @@ -55,30 +40,9 @@ impl ChunkMeta { pub fn new(checksum: &Checksum) -> Self { ChunkMeta { label: checksum.to_string(), - generation: None, - ended: None, - } - } - - /// Create a new generation chunk. - pub fn new_generation(checksum: &Checksum, ended: &str) -> Self { - ChunkMeta { - label: checksum.to_string(), - generation: Some(true), - ended: Some(ended.to_string()), } } - /// Is this a generation chunk? - pub fn is_generation(&self) -> bool { - matches!(self.generation, Some(true)) - } - - /// When did this generation end? - pub fn ended(&self) -> Option<&str> { - self.ended.as_deref() - } - /// The label of the content of the chunk. /// /// The caller should not interpret the label in any way. It @@ -121,25 +85,19 @@ mod test { fn new_creates_data_chunk() { let sum = Checksum::sha256_from_str_unchecked("abcdef"); let meta = ChunkMeta::new(&sum); - assert!(!meta.is_generation()); - assert_eq!(meta.ended(), None); assert_eq!(meta.label(), "abcdef"); } #[test] fn new_generation_creates_generation_chunk() { let sum = Checksum::sha256_from_str_unchecked("abcdef"); - let meta = ChunkMeta::new_generation(&sum, "2020-09-17T08:17:13+03:00"); - assert!(meta.is_generation()); - assert_eq!(meta.ended(), Some("2020-09-17T08:17:13+03:00")); + let meta = ChunkMeta::new(&sum); assert_eq!(meta.label(), "abcdef"); } #[test] fn data_chunk_from_json() { let meta: ChunkMeta = r#"{"label": "abcdef"}"#.parse().unwrap(); - assert!(!meta.is_generation()); - assert_eq!(meta.ended(), None); assert_eq!(meta.label(), "abcdef"); } @@ -149,15 +107,14 @@ mod test { r#"{"label": "abcdef", "generation": true, "ended": "2020-09-17T08:17:13+03:00"}"# .parse() .unwrap(); - assert!(meta.is_generation()); - assert_eq!(meta.ended(), Some("2020-09-17T08:17:13+03:00")); + assert_eq!(meta.label(), "abcdef"); } #[test] fn generation_json_roundtrip() { let sum = Checksum::sha256_from_str_unchecked("abcdef"); - let meta = ChunkMeta::new_generation(&sum, "2020-09-17T08:17:13+03:00"); + let meta = ChunkMeta::new(&sum); let json = serde_json::to_string(&meta).unwrap(); let meta2 = serde_json::from_str(&json).unwrap(); assert_eq!(meta, meta2); diff --git a/src/index.rs b/src/index.rs index a3d95fc..11f3480 100644 --- a/src/index.rs +++ b/src/index.rs @@ -9,7 +9,7 @@ use std::path::Path; /// A chunk index stored on the disk. /// /// A chunk index lets the server quickly find chunks based on a -/// string key/value pair, or whether they are generations. +/// string key/value pair. #[derive(Debug)] pub struct Index { conn: Connection, @@ -66,11 +66,6 @@ impl Index { sql::find_by_label(&self.conn, label) } - /// Find all backup generations. - pub fn find_generations(&self) -> Result, IndexError> { - sql::find_generations(&self.conn) - } - /// Find all chunks. pub fn all_chunks(&self) -> Result, IndexError> { sql::find_chunk_ids(&self.conn) @@ -125,36 +120,6 @@ mod test { let ids: Vec = idx.find_by_label("abc").unwrap(); assert_eq!(ids, vec![]); } - - #[test] - fn has_no_generations_initially() { - let dir = tempdir().unwrap(); - let idx = new_index(dir.path()); - assert_eq!(idx.find_generations().unwrap(), vec![]); - } - - #[test] - fn remembers_generation() { - let id: ChunkId = "id001".parse().unwrap(); - let sum = Checksum::sha256_from_str_unchecked("abc"); - let meta = ChunkMeta::new_generation(&sum, "timestamp"); - let dir = tempdir().unwrap(); - let mut idx = new_index(dir.path()); - idx.insert_meta(id.clone(), meta).unwrap(); - assert_eq!(idx.find_generations().unwrap(), vec![id]); - } - - #[test] - fn removes_generation() { - let id: ChunkId = "id001".parse().unwrap(); - let sum = Checksum::sha256_from_str_unchecked("abc"); - let meta = ChunkMeta::new_generation(&sum, "timestamp"); - let dir = tempdir().unwrap(); - let mut idx = new_index(dir.path()); - idx.insert_meta(id.clone(), meta).unwrap(); - idx.remove_meta(&id).unwrap(); - assert_eq!(idx.find_generations().unwrap(), vec![]); - } } mod sql { @@ -170,14 +135,10 @@ mod sql { let flags = OpenFlags::SQLITE_OPEN_CREATE | OpenFlags::SQLITE_OPEN_READ_WRITE; let conn = Connection::open_with_flags(filename, flags)?; conn.execute( - "CREATE TABLE chunks (id TEXT PRIMARY KEY, label TEXT, generation INT, ended TEXT)", + "CREATE TABLE chunks (id TEXT PRIMARY KEY, label TEXT)", params![], )?; conn.execute("CREATE INDEX label_idx ON chunks (label)", params![])?; - conn.execute( - "CREATE INDEX generation_idx ON chunks (generation)", - params![], - )?; conn.pragma_update(None, "journal_mode", &"WAL")?; Ok(conn) } @@ -194,11 +155,9 @@ mod sql { pub fn insert(t: &Transaction, chunkid: &ChunkId, meta: &ChunkMeta) -> Result<(), IndexError> { let chunkid = format!("{}", chunkid); let label = meta.label(); - let generation = if meta.is_generation() { 1 } else { 0 }; - let ended = meta.ended(); t.execute( - "INSERT INTO chunks (id, label, generation, ended) VALUES (?1, ?2, ?3, ?4)", - params![chunkid, label, generation, ended], + "INSERT INTO chunks (id, label) VALUES (?1, ?2)", + params![chunkid, label], )?; Ok(()) } @@ -243,21 +202,9 @@ mod sql { Ok(ids) } - /// Find all generations. - pub fn find_generations(conn: &Connection) -> Result, IndexError> { - let mut stmt = conn.prepare("SELECT id FROM chunks WHERE generation IS 1")?; - let iter = stmt.query_map(params![], row_to_id)?; - let mut ids = vec![]; - for x in iter { - let x = x?; - ids.push(x); - } - Ok(ids) - } - /// Find ids of all chunks. pub fn find_chunk_ids(conn: &Connection) -> Result, IndexError> { - let mut stmt = conn.prepare("SELECT id FROM chunks WHERE generation IS 0")?; + let mut stmt = conn.prepare("SELECT id FROM chunks")?; let iter = stmt.query_map(params![], row_to_id)?; let mut ids = vec![]; for x in iter { @@ -270,14 +217,7 @@ mod sql { fn row_to_meta(row: &Row) -> rusqlite::Result { let hash: String = row.get("label")?; let sha256 = Checksum::sha256_from_str_unchecked(&hash); - let generation: i32 = row.get("generation")?; - let meta = if generation == 0 { - ChunkMeta::new(&sha256) - } else { - let ended: String = row.get("ended")?; - ChunkMeta::new_generation(&sha256, &ended) - }; - Ok(meta) + Ok(ChunkMeta::new(&sha256)) } fn row_to_id(row: &Row) -> rusqlite::Result { diff --git a/src/indexedstore.rs b/src/indexedstore.rs index 46f9e72..15b5a22 100644 --- a/src/indexedstore.rs +++ b/src/indexedstore.rs @@ -68,11 +68,6 @@ impl IndexedStore { Ok(self.index.find_by_label(label)?) } - /// Find all generations. - pub fn find_generations(&self) -> Result, IndexedError> { - Ok(self.index.find_generations()?) - } - /// Remove a chunk from the store. pub fn remove(&mut self, id: &ChunkId) -> Result<(), IndexedError> { self.index.remove_meta(id)?; -- cgit v1.2.1