From 4c94c794ec805cf643826973e4f83826a1231e54 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Mon, 21 Mar 2022 12:01:40 +0200 Subject: fix: old typo in doc comment Sponsored-by: author --- src/client.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/client.rs b/src/client.rs index b58f89c..c4fbfec 100644 --- a/src/client.rs +++ b/src/client.rs @@ -147,7 +147,7 @@ impl BackupClient { Ok(has) } - /// Upload a data chunk to the srver. + /// Upload a data chunk to the server. pub async fn upload_chunk(&self, chunk: DataChunk) -> Result { let enc = self.cipher.encrypt_chunk(&chunk)?; let res = self -- cgit v1.2.1 From 48139725676fcce89a70897546969623f2474693 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Mon, 21 Mar 2022 09:12:49 +0200 Subject: feat! store list of generations in a "client trust root" chunk Backups made with this version can't be restored with old clients, and vice version. Sponsored-by: author --- obnam.md | 7 ++-- src/backup_run.rs | 3 +- src/chunk.rs | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/client.rs | 52 ++++++++++++++++++++++------ src/cmd/backup.rs | 17 +++++++-- src/cmd/gen_info.rs | 9 ++++- src/cmd/inspect.rs | 9 ++++- src/cmd/list.rs | 8 ++++- src/cmd/list_files.rs | 8 ++++- src/cmd/resolve.rs | 8 ++++- src/cmd/restore.rs | 8 ++++- src/cmd/show_gen.rs | 8 ++++- src/error.rs | 5 +++ 13 files changed, 214 insertions(+), 23 deletions(-) diff --git a/obnam.md b/obnam.md index 12ac087..da641a5 100644 --- a/obnam.md +++ b/obnam.md @@ -1538,14 +1538,15 @@ data. The backup uses a chunk size of one byte, and backs up a file with three bytes. This results in three chunks for the file data, plus one for the generation SQLite file (not split into chunks of one byte), -plus a chunk for the generation itself. A total of five chunks. +plus a chunk for the generation itself. Additionally, the "trust root" +chunk exists. A total of six chunks. ~~~scenario given a working Obnam system given a client config based on tiny-chunk-size.yaml given a file live/data.dat containing "abc" when I run obnam backup -then server has 5 chunks +then server has 6 chunks ~~~ ~~~{#tiny-chunk-size.yaml .file .yaml .numberLines} @@ -1846,10 +1847,10 @@ then exit code is 1 and stdout contains "live/CACHEDIR.TAG" when I run obnam list-files then exit code is 0 +~~~ then file live/CACHEDIR.TAG was backed up because it was new and stdout doesn't contain "live/data1.dat" and stdout doesn't contain "live/data2.dat" -~~~ ### Ignore CACHEDIR.TAGs if `exclude_cache_tag_directories` is disabled diff --git a/src/backup_run.rs b/src/backup_run.rs index b03a7ee..5ceaed3 100644 --- a/src/backup_run.rs +++ b/src/backup_run.rs @@ -406,7 +406,8 @@ impl<'a> BackupRun<'a> { } } -fn current_timestamp() -> String { +/// Current timestamp as an ISO 8601 string. +pub fn current_timestamp() -> String { let now: DateTime = Local::now(); format!("{}", now.format("%Y-%m-%d %H:%M:%S.%f %z")) } diff --git a/src/chunk.rs b/src/chunk.rs index a37aa57..238bd01 100644 --- a/src/chunk.rs +++ b/src/chunk.rs @@ -102,3 +102,98 @@ impl GenerationChunk { Ok(DataChunk::new(bytes, meta)) } } + +/// A client trust root chunk. +/// +/// This chunk contains all per-client backup information. As long as +/// this chunk can be trusted, everything it links to can also be +/// trusted, thanks to cryptographic signatures. +#[derive(Debug, Serialize, Deserialize)] +pub struct ClientTrust { + client_name: String, + previous_version: Option, + timestamp: String, + backups: Vec, +} + +/// All the errors that may be returned for `ClientTrust` operations. +#[derive(Debug, thiserror::Error)] +pub enum ClientTrustError { + /// Error converting text from UTF8. + #[error(transparent)] + Utf8Error(#[from] std::str::Utf8Error), + + /// Error parsing JSON as chunk metadata. + #[error("failed to parse JSON: {0}")] + JsonParse(serde_json::Error), + + /// Error generating JSON from chunk metadata. + #[error("failed to serialize to JSON: {0}")] + JsonGenerate(serde_json::Error), +} + +impl ClientTrust { + /// Create a new ClientTrust object. + pub fn new( + name: &str, + previous_version: Option, + timestamp: String, + backups: Vec, + ) -> Self { + Self { + client_name: name.to_string(), + previous_version, + timestamp, + backups, + } + } + + /// Return client name. + pub fn client_name(&self) -> &str { + &self.client_name + } + + /// Return id of previous version, if any. + pub fn previous_version(&self) -> Option { + self.previous_version.clone() + } + + /// Return timestamp. + pub fn timestamp(&self) -> &str { + &self.timestamp + } + + /// Return list of all backup generations known. + pub fn backups(&self) -> &[ChunkId] { + &self.backups + } + + /// Append a backup generation to the list. + pub fn append_backup(&mut self, id: &ChunkId) { + self.backups.push(id.clone()); + } + + /// Update for new upload. + /// + /// This needs to happen every time the chunk is updated so that + /// the timestamp gets updated. + pub fn finalize(&mut self, timestamp: String) { + self.timestamp = timestamp; + } + + /// Convert generation chunk to a data chunk. + pub fn to_data_chunk(&self) -> Result { + let json: String = serde_json::to_string(self).map_err(ClientTrustError::JsonGenerate)?; + let bytes = json.as_bytes().to_vec(); + let checksum = Checksum::sha256_from_str_unchecked("client-trust"); + let meta = ChunkMeta::new_generation(&checksum, ""); + Ok(DataChunk::new(bytes, meta)) + } + + /// Create a new ClientTrust from a data chunk. + pub fn from_data_chunk(chunk: &DataChunk) -> Result { + let data = chunk.data(); + let data = std::str::from_utf8(data)?; + serde_json::from_str(data).map_err(ClientTrustError::JsonParse) + } +} diff --git a/src/client.rs b/src/client.rs index c4fbfec..5b13cb7 100644 --- a/src/client.rs +++ b/src/client.rs @@ -1,6 +1,8 @@ //! Client to the Obnam server HTTP API. -use crate::chunk::{DataChunk, GenerationChunk, GenerationChunkError}; +use crate::chunk::{ + ClientTrust, ClientTrustError, DataChunk, GenerationChunk, GenerationChunkError, +}; use crate::chunkid::ChunkId; use crate::chunkmeta::ChunkMeta; use crate::cipher::{CipherEngine, CipherError}; @@ -54,6 +56,10 @@ pub enum ClientError { #[error(transparent)] GenerationChunkError(#[from] GenerationChunkError), + /// An error regarding client trust. + #[error(transparent)] + ClientTrust(#[from] ClientTrustError), + /// An error using a backup's local metadata. #[error(transparent)] LocalGenerationError(#[from] LocalGenerationError), @@ -170,18 +176,44 @@ impl BackupClient { Ok(chunk_id) } - /// List backup generations known by the server. - pub async fn list_generations(&self) -> Result { - let (_, body) = self.get("", &[("generation", "true")]).await?; + /// Get current client trust chunk from repository, if there is one. + pub async fn get_client_trust(&self) -> Result, ClientError> { + let ids = self.find_client_trusts().await?; + let mut latest: Option = None; + for id in ids { + let chunk = self.fetch_chunk(&id).await?; + let new = ClientTrust::from_data_chunk(&chunk)?; + if let Some(t) = &latest { + if new.timestamp() > t.timestamp() { + latest = Some(new); + } + } else { + latest = Some(new); + } + } + Ok(latest) + } + + async fn find_client_trusts(&self) -> Result, ClientError> { + let body = match self.get("", &[("label", "client-trust")]).await { + Ok((_, body)) => body, + Err(err) => return Err(err), + }; - let map: HashMap = - serde_yaml::from_slice(&body).map_err(ClientError::YamlParse)?; - debug!("list_generations: map={:?}", map); - let finished = map + let hits: HashMap = + serde_json::from_slice(&body).map_err(ClientError::JsonParse)?; + let ids = hits.iter().map(|(id, _)| id.into()).collect(); + Ok(ids) + } + + /// List backup generations known by the server. + pub fn list_generations(&self, trust: &ClientTrust) -> GenerationList { + let finished = trust + .backups() .iter() - .map(|(id, meta)| FinishedGeneration::new(id, meta.ended().map_or("", |s| s))) + .map(|id| FinishedGeneration::new(&format!("{}", id), "")) .collect(); - Ok(GenerationList::new(finished)) + GenerationList::new(finished) } /// Fetch a data chunk from the server, given the chunk identifier. diff --git a/src/cmd/backup.rs b/src/cmd/backup.rs index 6983de4..db65da0 100644 --- a/src/cmd/backup.rs +++ b/src/cmd/backup.rs @@ -1,6 +1,7 @@ //! The `backup` subcommand. -use crate::backup_run::BackupRun; +use crate::backup_run::{current_timestamp, BackupRun}; +use crate::chunk::ClientTrust; use crate::client::BackupClient; use crate::config::ClientConfig; use crate::dbgen::{schema_version, FileId, DEFAULT_SCHEMA_MAJOR}; @@ -37,7 +38,12 @@ impl Backup { eprintln!("backup: schema: {}", schema); let client = BackupClient::new(config)?; - let genlist = client.list_generations().await?; + let trust = client + .get_client_trust() + .await? + .or_else(|| Some(ClientTrust::new("FIXME", None, current_timestamp(), vec![]))) + .unwrap(); + let genlist = client.list_generations(&trust); let temp = tempdir()?; let oldtemp = temp.path().join("old.db"); @@ -64,6 +70,13 @@ impl Backup { } }; + let mut trust = trust; + trust.append_backup(outcome.gen_id.as_chunk_id()); + trust.finalize(current_timestamp()); + let trust = trust.to_data_chunk()?; + let trust_id = client.upload_chunk(trust).await?; + info!("uploaded new client-trust {}", trust_id); + for w in outcome.warnings.iter() { println!("warning: {}", w); } diff --git a/src/cmd/gen_info.rs b/src/cmd/gen_info.rs index 2ce1f64..0aec103 100644 --- a/src/cmd/gen_info.rs +++ b/src/cmd/gen_info.rs @@ -1,5 +1,6 @@ //! The `gen-info` subcommand. +use crate::chunk::ClientTrust; use crate::client::BackupClient; use crate::config::ClientConfig; use crate::error::ObnamError; @@ -28,7 +29,13 @@ impl GenInfo { let client = BackupClient::new(config)?; - let genlist = client.list_generations().await?; + let trust = client + .get_client_trust() + .await? + .or_else(|| Some(ClientTrust::new("FIXME", None, "".to_string(), vec![]))) + .unwrap(); + + let genlist = client.list_generations(&trust); let gen_id = genlist.resolve(&self.gen_ref)?; info!("generation id is {}", gen_id.as_chunk_id()); diff --git a/src/cmd/inspect.rs b/src/cmd/inspect.rs index d5a75c6..02801ae 100644 --- a/src/cmd/inspect.rs +++ b/src/cmd/inspect.rs @@ -1,5 +1,7 @@ //! The `inspect` subcommand. +use crate::backup_run::current_timestamp; +use crate::chunk::ClientTrust; use crate::client::BackupClient; use crate::config::ClientConfig; use crate::error::ObnamError; @@ -27,7 +29,12 @@ impl Inspect { async fn run_async(&self, config: &ClientConfig) -> Result<(), ObnamError> { let temp = NamedTempFile::new()?; let client = BackupClient::new(config)?; - let genlist = client.list_generations().await?; + let trust = client + .get_client_trust() + .await? + .or_else(|| Some(ClientTrust::new("FIXME", None, current_timestamp(), vec![]))) + .unwrap(); + let genlist = client.list_generations(&trust); let gen_id = genlist.resolve(&self.gen_id)?; info!("generation id is {}", gen_id.as_chunk_id()); diff --git a/src/cmd/list.rs b/src/cmd/list.rs index f176594..bbb9c91 100644 --- a/src/cmd/list.rs +++ b/src/cmd/list.rs @@ -1,5 +1,6 @@ //! The `list` subcommand. +use crate::chunk::ClientTrust; use crate::client::BackupClient; use crate::config::ClientConfig; use crate::error::ObnamError; @@ -19,8 +20,13 @@ impl List { async fn run_async(&self, config: &ClientConfig) -> Result<(), ObnamError> { let client = BackupClient::new(config)?; + let trust = client + .get_client_trust() + .await? + .or_else(|| Some(ClientTrust::new("FIXME", None, "".to_string(), vec![]))) + .unwrap(); - let generations = client.list_generations().await?; + let generations = client.list_generations(&trust); for finished in generations.iter() { println!("{} {}", finished.id(), finished.ended()); } diff --git a/src/cmd/list_files.rs b/src/cmd/list_files.rs index 9126564..fb4764d 100644 --- a/src/cmd/list_files.rs +++ b/src/cmd/list_files.rs @@ -1,6 +1,7 @@ //! The `list-files` subcommand. use crate::backup_reason::Reason; +use crate::chunk::ClientTrust; use crate::client::BackupClient; use crate::config::ClientConfig; use crate::error::ObnamError; @@ -28,8 +29,13 @@ impl ListFiles { let temp = NamedTempFile::new()?; let client = BackupClient::new(config)?; + let trust = client + .get_client_trust() + .await? + .or_else(|| Some(ClientTrust::new("FIXME", None, "".to_string(), vec![]))) + .unwrap(); - let genlist = client.list_generations().await?; + let genlist = client.list_generations(&trust); let gen_id = genlist.resolve(&self.gen_id)?; let gen = client.fetch_generation(&gen_id, temp.path()).await?; diff --git a/src/cmd/resolve.rs b/src/cmd/resolve.rs index 3b9570a..12432cc 100644 --- a/src/cmd/resolve.rs +++ b/src/cmd/resolve.rs @@ -1,5 +1,6 @@ //! The `resolve` subcommand. +use crate::chunk::ClientTrust; use crate::client::BackupClient; use crate::config::ClientConfig; use crate::error::ObnamError; @@ -22,7 +23,12 @@ impl Resolve { async fn run_async(&self, config: &ClientConfig) -> Result<(), ObnamError> { let client = BackupClient::new(config)?; - let generations = client.list_generations().await?; + let trust = client + .get_client_trust() + .await? + .or_else(|| Some(ClientTrust::new("FIXME", None, "".to_string(), vec![]))) + .unwrap(); + let generations = client.list_generations(&trust); match generations.resolve(&self.generation) { Err(err) => { diff --git a/src/cmd/restore.rs b/src/cmd/restore.rs index 43d9679..4a637da 100644 --- a/src/cmd/restore.rs +++ b/src/cmd/restore.rs @@ -1,6 +1,7 @@ //! The `restore` subcommand. use crate::backup_reason::Reason; +use crate::chunk::ClientTrust; use crate::client::{BackupClient, ClientError}; use crate::config::ClientConfig; use crate::db::DatabaseError; @@ -46,8 +47,13 @@ impl Restore { let temp = NamedTempFile::new()?; let client = BackupClient::new(config)?; + let trust = client + .get_client_trust() + .await? + .or_else(|| Some(ClientTrust::new("FIXME", None, "".to_string(), vec![]))) + .unwrap(); - let genlist = client.list_generations().await?; + let genlist = client.list_generations(&trust); let gen_id = genlist.resolve(&self.gen_id)?; info!("generation id is {}", gen_id.as_chunk_id()); diff --git a/src/cmd/show_gen.rs b/src/cmd/show_gen.rs index 006e0e0..970a165 100644 --- a/src/cmd/show_gen.rs +++ b/src/cmd/show_gen.rs @@ -1,5 +1,6 @@ //! The `show-generation` subcommand. +use crate::chunk::ClientTrust; use crate::client::BackupClient; use crate::config::ClientConfig; use crate::error::ObnamError; @@ -27,8 +28,13 @@ impl ShowGeneration { async fn run_async(&self, config: &ClientConfig) -> Result<(), ObnamError> { let temp = NamedTempFile::new()?; let client = BackupClient::new(config)?; + let trust = client + .get_client_trust() + .await? + .or_else(|| Some(ClientTrust::new("FIXME", None, "".to_string(), vec![]))) + .unwrap(); - let genlist = client.list_generations().await?; + let genlist = client.list_generations(&trust); let gen_id = genlist.resolve(&self.gen_id)?; let gen = client.fetch_generation(&gen_id, temp.path()).await?; let mut files = gen.files()?; diff --git a/src/error.rs b/src/error.rs index 7812081..9c9b432 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,6 +1,7 @@ //! Errors from Obnam client. use crate::backup_run::BackupError; +use crate::chunk::ClientTrustError; use crate::cipher::CipherError; use crate::client::ClientError; use crate::cmd::restore::RestoreError; @@ -25,6 +26,10 @@ pub enum ObnamError { #[error(transparent)] GenerationListError(#[from] GenerationListError), + /// Error about client trust chunks. + #[error(transparent)] + ClientTrust(#[from] ClientTrustError), + /// Error saving passwords. #[error("couldn't save passwords to {0}: {1}")] PasswordSave(PathBuf, PasswordError), -- cgit v1.2.1 From 7e49a52cee01a26b91253abb38dcc40b5ae727e2 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Mon, 21 Mar 2022 17:15:51 +0200 Subject: feat! drop generation related fields from chunk metadata Sponsored-by: author --- obnam.md | 8 +++--- src/backup_run.rs | 2 +- src/bin/obnam-server.rs | 4 +-- src/chunk.rs | 6 ++--- src/chunkmeta.rs | 55 +++++-------------------------------- src/index.rs | 72 +++++-------------------------------------------- src/indexedstore.rs | 5 ---- 7 files changed, 21 insertions(+), 131 deletions(-) diff --git a/obnam.md b/obnam.md index da641a5..68aa01e 100644 --- a/obnam.md +++ b/obnam.md @@ -1112,7 +1112,7 @@ We must be able to retrieve it. when I GET /chunks/ then HTTP status code is 200 and content-type is application/octet-stream -and chunk-meta is {"label":"abc","generation":null,"ended":null} +and chunk-meta is {"label":"abc"} and the body matches file data.dat ~~~ @@ -1122,7 +1122,7 @@ We must also be able to find it based on metadata. when I GET /chunks?label=abc then HTTP status code is 200 and content-type is application/json -and the JSON body matches {"":{"label":"abc","generation":null,"ended":null}} +and the JSON body matches {"":{"label":"abc"}} ~~~ Finally, we must be able to delete it. After that, we must not be able @@ -1204,7 +1204,7 @@ Can we still find it by its metadata? when I GET /chunks?label=abc then HTTP status code is 200 and content-type is application/json -and the JSON body matches {"":{"label":"abc","generation":null,"ended":null}} +and the JSON body matches {"":{"label":"abc"}} ~~~ Can we still retrieve it by its identifier? @@ -1213,7 +1213,7 @@ Can we still retrieve it by its identifier? when I GET /chunks/ then HTTP status code is 200 and content-type is application/octet-stream -and chunk-meta is {"label":"abc","generation":null,"ended":null} +and chunk-meta is {"label":"abc"} and the body matches file data.dat ~~~ diff --git a/src/backup_run.rs b/src/backup_run.rs index 5ceaed3..21140ba 100644 --- a/src/backup_run.rs +++ b/src/backup_run.rs @@ -356,7 +356,7 @@ impl<'a> BackupRun<'a> { info!("upload SQLite {}", filename.display()); let ids = self.upload_regular_file(filename, size).await?; let gen = GenerationChunk::new(ids); - let data = gen.to_data_chunk(¤t_timestamp())?; + let data = gen.to_data_chunk()?; let gen_id = self.client.upload_chunk(data).await?; info!("uploaded generation {}", gen_id); Ok(gen_id) diff --git a/src/bin/obnam-server.rs b/src/bin/obnam-server.rs index 0b80854..5be2cee 100644 --- a/src/bin/obnam-server.rs +++ b/src/bin/obnam-server.rs @@ -153,9 +153,7 @@ pub async fn search_chunks( error!("search has more than one key to search for"); return Ok(ChunkResult::BadRequest); } - if key == "generation" && value == "true" { - store.find_generations().expect("SQL lookup failed") - } else if key == "label" { + if key == "label" { store.find_by_label(value).expect("SQL lookup failed") } else { error!("unknown search key {:?}", key); diff --git a/src/chunk.rs b/src/chunk.rs index 238bd01..27a3ab9 100644 --- a/src/chunk.rs +++ b/src/chunk.rs @@ -93,12 +93,12 @@ impl GenerationChunk { } /// Convert generation chunk to a data chunk. - pub fn to_data_chunk(&self, ended: &str) -> Result { + pub fn to_data_chunk(&self) -> Result { let json: String = serde_json::to_string(self).map_err(GenerationChunkError::JsonGenerate)?; let bytes = json.as_bytes().to_vec(); let checksum = Checksum::sha256(&bytes); - let meta = ChunkMeta::new_generation(&checksum, ended); + let meta = ChunkMeta::new(&checksum); Ok(DataChunk::new(bytes, meta)) } } @@ -186,7 +186,7 @@ impl ClientTrust { let json: String = serde_json::to_string(self).map_err(ClientTrustError::JsonGenerate)?; let bytes = json.as_bytes().to_vec(); let checksum = Checksum::sha256_from_str_unchecked("client-trust"); - let meta = ChunkMeta::new_generation(&checksum, ""); + let meta = ChunkMeta::new(&checksum); Ok(DataChunk::new(bytes, meta)) } diff --git a/src/chunkmeta.rs b/src/chunkmeta.rs index 9a435fe..33c1070 100644 --- a/src/chunkmeta.rs +++ b/src/chunkmeta.rs @@ -7,26 +7,15 @@ use std::str::FromStr; /// Metadata about chunks. /// -/// We manage three bits of metadata about chunks, in addition to its -/// identifier: -/// -/// * for all chunks, a [SHA256][] checksum of the chunk content; we -/// expose this to the server as the chunk "label" -/// -/// * for generation chunks, an indication that it is a generation -/// chunk, and a timestamp for when making the generation snapshot -/// ended -/// -/// There is no syntax or semantics imposed on the timestamp, but a -/// client should probably use [ISO 8601][] representation. +/// We a single piece of metadata about chunks, in addition to its +/// identifier: a label assigned by the client. Currently, this is a +/// [SHA256][] checksum of the chunk content. /// /// For HTTP, the metadata will be serialised as a JSON object, like this: /// /// ~~~json /// { /// "label": "09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b", -/// "generation": true, -/// "ended": "2020-09-17T08:17:13+03:00" /// } /// ~~~ /// @@ -42,10 +31,6 @@ use std::str::FromStr; #[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)] pub struct ChunkMeta { label: String, - // The remaining fields are Options so that JSON parsing doesn't - // insist on them being there in the textual representation. - generation: Option, - ended: Option, } impl ChunkMeta { @@ -55,30 +40,9 @@ impl ChunkMeta { pub fn new(checksum: &Checksum) -> Self { ChunkMeta { label: checksum.to_string(), - generation: None, - ended: None, - } - } - - /// Create a new generation chunk. - pub fn new_generation(checksum: &Checksum, ended: &str) -> Self { - ChunkMeta { - label: checksum.to_string(), - generation: Some(true), - ended: Some(ended.to_string()), } } - /// Is this a generation chunk? - pub fn is_generation(&self) -> bool { - matches!(self.generation, Some(true)) - } - - /// When did this generation end? - pub fn ended(&self) -> Option<&str> { - self.ended.as_deref() - } - /// The label of the content of the chunk. /// /// The caller should not interpret the label in any way. It @@ -121,25 +85,19 @@ mod test { fn new_creates_data_chunk() { let sum = Checksum::sha256_from_str_unchecked("abcdef"); let meta = ChunkMeta::new(&sum); - assert!(!meta.is_generation()); - assert_eq!(meta.ended(), None); assert_eq!(meta.label(), "abcdef"); } #[test] fn new_generation_creates_generation_chunk() { let sum = Checksum::sha256_from_str_unchecked("abcdef"); - let meta = ChunkMeta::new_generation(&sum, "2020-09-17T08:17:13+03:00"); - assert!(meta.is_generation()); - assert_eq!(meta.ended(), Some("2020-09-17T08:17:13+03:00")); + let meta = ChunkMeta::new(&sum); assert_eq!(meta.label(), "abcdef"); } #[test] fn data_chunk_from_json() { let meta: ChunkMeta = r#"{"label": "abcdef"}"#.parse().unwrap(); - assert!(!meta.is_generation()); - assert_eq!(meta.ended(), None); assert_eq!(meta.label(), "abcdef"); } @@ -149,15 +107,14 @@ mod test { r#"{"label": "abcdef", "generation": true, "ended": "2020-09-17T08:17:13+03:00"}"# .parse() .unwrap(); - assert!(meta.is_generation()); - assert_eq!(meta.ended(), Some("2020-09-17T08:17:13+03:00")); + assert_eq!(meta.label(), "abcdef"); } #[test] fn generation_json_roundtrip() { let sum = Checksum::sha256_from_str_unchecked("abcdef"); - let meta = ChunkMeta::new_generation(&sum, "2020-09-17T08:17:13+03:00"); + let meta = ChunkMeta::new(&sum); let json = serde_json::to_string(&meta).unwrap(); let meta2 = serde_json::from_str(&json).unwrap(); assert_eq!(meta, meta2); diff --git a/src/index.rs b/src/index.rs index a3d95fc..11f3480 100644 --- a/src/index.rs +++ b/src/index.rs @@ -9,7 +9,7 @@ use std::path::Path; /// A chunk index stored on the disk. /// /// A chunk index lets the server quickly find chunks based on a -/// string key/value pair, or whether they are generations. +/// string key/value pair. #[derive(Debug)] pub struct Index { conn: Connection, @@ -66,11 +66,6 @@ impl Index { sql::find_by_label(&self.conn, label) } - /// Find all backup generations. - pub fn find_generations(&self) -> Result, IndexError> { - sql::find_generations(&self.conn) - } - /// Find all chunks. pub fn all_chunks(&self) -> Result, IndexError> { sql::find_chunk_ids(&self.conn) @@ -125,36 +120,6 @@ mod test { let ids: Vec = idx.find_by_label("abc").unwrap(); assert_eq!(ids, vec![]); } - - #[test] - fn has_no_generations_initially() { - let dir = tempdir().unwrap(); - let idx = new_index(dir.path()); - assert_eq!(idx.find_generations().unwrap(), vec![]); - } - - #[test] - fn remembers_generation() { - let id: ChunkId = "id001".parse().unwrap(); - let sum = Checksum::sha256_from_str_unchecked("abc"); - let meta = ChunkMeta::new_generation(&sum, "timestamp"); - let dir = tempdir().unwrap(); - let mut idx = new_index(dir.path()); - idx.insert_meta(id.clone(), meta).unwrap(); - assert_eq!(idx.find_generations().unwrap(), vec![id]); - } - - #[test] - fn removes_generation() { - let id: ChunkId = "id001".parse().unwrap(); - let sum = Checksum::sha256_from_str_unchecked("abc"); - let meta = ChunkMeta::new_generation(&sum, "timestamp"); - let dir = tempdir().unwrap(); - let mut idx = new_index(dir.path()); - idx.insert_meta(id.clone(), meta).unwrap(); - idx.remove_meta(&id).unwrap(); - assert_eq!(idx.find_generations().unwrap(), vec![]); - } } mod sql { @@ -170,14 +135,10 @@ mod sql { let flags = OpenFlags::SQLITE_OPEN_CREATE | OpenFlags::SQLITE_OPEN_READ_WRITE; let conn = Connection::open_with_flags(filename, flags)?; conn.execute( - "CREATE TABLE chunks (id TEXT PRIMARY KEY, label TEXT, generation INT, ended TEXT)", + "CREATE TABLE chunks (id TEXT PRIMARY KEY, label TEXT)", params![], )?; conn.execute("CREATE INDEX label_idx ON chunks (label)", params![])?; - conn.execute( - "CREATE INDEX generation_idx ON chunks (generation)", - params![], - )?; conn.pragma_update(None, "journal_mode", &"WAL")?; Ok(conn) } @@ -194,11 +155,9 @@ mod sql { pub fn insert(t: &Transaction, chunkid: &ChunkId, meta: &ChunkMeta) -> Result<(), IndexError> { let chunkid = format!("{}", chunkid); let label = meta.label(); - let generation = if meta.is_generation() { 1 } else { 0 }; - let ended = meta.ended(); t.execute( - "INSERT INTO chunks (id, label, generation, ended) VALUES (?1, ?2, ?3, ?4)", - params![chunkid, label, generation, ended], + "INSERT INTO chunks (id, label) VALUES (?1, ?2)", + params![chunkid, label], )?; Ok(()) } @@ -243,21 +202,9 @@ mod sql { Ok(ids) } - /// Find all generations. - pub fn find_generations(conn: &Connection) -> Result, IndexError> { - let mut stmt = conn.prepare("SELECT id FROM chunks WHERE generation IS 1")?; - let iter = stmt.query_map(params![], row_to_id)?; - let mut ids = vec![]; - for x in iter { - let x = x?; - ids.push(x); - } - Ok(ids) - } - /// Find ids of all chunks. pub fn find_chunk_ids(conn: &Connection) -> Result, IndexError> { - let mut stmt = conn.prepare("SELECT id FROM chunks WHERE generation IS 0")?; + let mut stmt = conn.prepare("SELECT id FROM chunks")?; let iter = stmt.query_map(params![], row_to_id)?; let mut ids = vec![]; for x in iter { @@ -270,14 +217,7 @@ mod sql { fn row_to_meta(row: &Row) -> rusqlite::Result { let hash: String = row.get("label")?; let sha256 = Checksum::sha256_from_str_unchecked(&hash); - let generation: i32 = row.get("generation")?; - let meta = if generation == 0 { - ChunkMeta::new(&sha256) - } else { - let ended: String = row.get("ended")?; - ChunkMeta::new_generation(&sha256, &ended) - }; - Ok(meta) + Ok(ChunkMeta::new(&sha256)) } fn row_to_id(row: &Row) -> rusqlite::Result { diff --git a/src/indexedstore.rs b/src/indexedstore.rs index 46f9e72..15b5a22 100644 --- a/src/indexedstore.rs +++ b/src/indexedstore.rs @@ -68,11 +68,6 @@ impl IndexedStore { Ok(self.index.find_by_label(label)?) } - /// Find all generations. - pub fn find_generations(&self) -> Result, IndexedError> { - Ok(self.index.find_generations()?) - } - /// Remove a chunk from the store. pub fn remove(&mut self, id: &ChunkId) -> Result<(), IndexedError> { self.index.remove_meta(id)?; -- cgit v1.2.1 From d692b1877a18e83e23cfbcad1b5e9f91f561e7c1 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Mon, 21 Mar 2022 17:27:17 +0200 Subject: docs: update obnam.md about client trust root chunk Sponsored-by: author --- obnam.md | 43 ++++++++++++++++++------------------------- 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/obnam.md b/obnam.md index 68aa01e..a06717a 100644 --- a/obnam.md +++ b/obnam.md @@ -250,7 +250,7 @@ requirements and notes how they affect the architecture. * **Large numbers of live data files:** Storing and accessing lists of and meta data about files needs to done using data structures that are efficient for that. -* **Live data in the terabyte range:** +* **Live data in the terabyte range:** FIXME * **Many clients:** The architecture should enable flexibly managing clients. * **Shared repository:** The server component needs identify and @@ -895,7 +895,7 @@ clients. Chunks consist of arbitrary binary data, a small amount of metadata, and an identifier chosen by the server. The chunk metadata is a JSON -object, consisting of the following fields: +object, consisting of the following field (there used to be more): * `label` — the SHA256 checksum of the chunk contents as determined by the client @@ -904,21 +904,22 @@ object, consisting of the following fields: - note that the server doesn't verify this in any way, to pave way for future client-side encryption of the chunk data, including the label -* `generation` — set to `true` if the chunk represents a - generation - - may also be set to `false` or `null` or be missing entirely - - the server allows for listing chunks where this field is set to - `true` -* `ended` — the timestamp of when the backup generation ended - - note that the server doesn't process this in any way, the contents - is entirely up to the client - - may be set to the empty string, `null`, or be missing entirely - - this can't be used in searches + - there is no requirement that only one chunk has any given label When creating or retrieving a chunk, its metadata is carried in a `Chunk-Meta` header as a JSON object, serialized into a textual form that can be put into HTTP headers. +There are several kinds of chunk. The kind only matters to the client, +not to the server. + +* Data chunk: File content data, from live data files, or from an + SQLite database file listing all files in a backup. +* Generation chunk: A list of chunks for the SQLite file for a + generation. +* Client trust: A list of ids of generation chunks, plus other data + that are per-client, not per-backup. + ## Server @@ -929,12 +930,7 @@ The server has the following API for managing chunks: * `GET /chunks/` — retrieve a chunk (and its metadata) from the server, given a chunk identifier * `GET /chunks?label=xyzzy` — find chunks on the server whose - metadata indicates their contents has a given SHA256 checksum -* `GET /chunks?generation=true` — find generation chunks -* `GET /chunks?data=True` — find chunks with file data - - this is meant for testing only - - it excludes generation chunks, and chunks used to store the - generation's SQLite file + metadata has a specific value for a label. HTTP status codes are used to indicate if a request succeeded or not, using the customary meanings. @@ -958,17 +954,14 @@ and should treat it as an opaque value. When a chunk is retrieved, the chunk metadata is returned in the `Chunk-Meta` header, and the contents in the response body. -It is not possible to update a chunk or its metadata. - -When searching for chunks, any matching chunk's identifiers and -metadata are returned in a JSON object: +It is not possible to update a chunk or its metadata. It's not +possible to remove a chunk. When searching for chunks, any matching +chunk's identifiers and metadata are returned in a JSON object: ~~~json { "fe20734b-edb3-432f-83c3-d35fe15969dd": { - "label": "09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b", - "generation": null, - "ended: null, + "label": "09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b" } } ~~~ -- cgit v1.2.1