From 543107a70eeffaa6932c87c02b43b0fd4f8558e5 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sun, 3 Jan 2021 13:09:35 +0200 Subject: feat: load chunk metadata into index at startup This needs to be replace with a database or something, but it'll do for now. --- obnam.md | 43 +++++++++++++++++++++++++++++++++++++++++++ src/bin/obnam-server.rs | 4 +++- src/chunkid.rs | 9 +++++++++ src/indexedstore.rs | 38 +++++++++++++++++++++++++++++++++++--- subplot/server.py | 3 ++- subplot/server.yaml | 3 +++ 6 files changed, 95 insertions(+), 5 deletions(-) diff --git a/obnam.md b/obnam.md index e877e4c..7fa902f 100644 --- a/obnam.md +++ b/obnam.md @@ -777,6 +777,49 @@ then HTTP status code is 404 ~~~ +## Persistent across restarts + +Chunk storage, and the index of chunk metadata for searches, needs to +be persistent across restarts. This scenario verifies it is so. + +First, create a chunk. + +~~~scenario +given an installed obnam +and a running chunk server +and a file data.dat containing some random data +when I POST data.dat to /chunks, with chunk-meta: {"sha256":"abc"} +then HTTP status code is 201 +and content-type is application/json +and the JSON body has a field chunk_id, henceforth ID +~~~ + +Then, restart the server. + +~~~scenario +when the chunk server is stopped +given a running chunk server +~~~ + +Can we still find it by its metadata? + +~~~scenario +when I GET /chunks?sha256=abc +then HTTP status code is 200 +and content-type is application/json +and the JSON body matches {"":{"sha256":"abc","generation":null,"ended":null}} +~~~ + +Can we still retrieve it by its identifier? + +~~~scenario +when I GET /chunks/ +then HTTP status code is 200 +and content-type is application/octet-stream +and chunk-meta is {"sha256":"abc","generation":null,"ended":null} +and the body matches file data.dat +~~~ + # Acceptance criteria for Obnam as a whole The scenarios in this chapter apply to Obnam as a whole: the client diff --git a/src/bin/obnam-server.rs b/src/bin/obnam-server.rs index dc8aa82..76d018f 100644 --- a/src/bin/obnam-server.rs +++ b/src/bin/obnam-server.rs @@ -36,7 +36,9 @@ async fn main() -> anyhow::Result<()> { return Err(ConfigError::BadServerAddress.into()); } - let store = IndexedStore::new(&config.chunks); + let mut store = IndexedStore::new(&config.chunks); + store.fill_index()?; + println!("existing generations: {:?}", store.find_generations()); let store = Arc::new(Mutex::new(store)); let store = warp::any().map(move || Arc::clone(&store)); diff --git a/src/chunkid.rs b/src/chunkid.rs index 73e0b27..9eec41f 100644 --- a/src/chunkid.rs +++ b/src/chunkid.rs @@ -2,6 +2,7 @@ use crate::checksummer::sha256; use rusqlite::types::ToSqlOutput; use rusqlite::ToSql; use serde::{Deserialize, Serialize}; +use std::ffi::OsStr; use std::fmt; use std::hash::Hash; use std::str::FromStr; @@ -68,6 +69,14 @@ impl From<&String> for ChunkId { } } +impl From<&OsStr> for ChunkId { + fn from(s: &OsStr) -> Self { + ChunkId { + id: s.to_string_lossy().to_string(), + } + } +} + impl FromStr for ChunkId { type Err = (); diff --git a/src/indexedstore.rs b/src/indexedstore.rs index 5a41406..3f6235f 100644 --- a/src/indexedstore.rs +++ b/src/indexedstore.rs @@ -3,13 +3,15 @@ use crate::chunkid::ChunkId; use crate::chunkmeta::ChunkMeta; use crate::index::Index; use crate::store::{LoadedChunk, Store}; -use std::path::Path; +use std::path::{Path, PathBuf}; +use walkdir::WalkDir; /// A store for chunks and their metadata. /// /// This combines Store and Index into one interface to make it easier /// to handle the server side storage of chunks. pub struct IndexedStore { + dirname: PathBuf, store: Store, index: Index, } @@ -18,17 +20,47 @@ impl IndexedStore { pub fn new(dirname: &Path) -> Self { let store = Store::new(dirname); let index = Index::default(); - Self { store, index } + Self { + dirname: dirname.to_path_buf(), + store, + index, + } + } + + pub fn fill_index(&mut self) -> anyhow::Result<()> { + for entry in WalkDir::new(&self.dirname) { + let entry = entry?; + let path = entry.path(); + // println!("found entry: {:?} (ext: {:?})", path, path.extension()); + if let Some(ext) = path.extension() { + if ext == "meta" { + println!("found meta: {:?}", path); + let text = std::fs::read(path)?; + let meta: ChunkMeta = serde_json::from_slice(&text)?; + if let Some(stem) = path.file_stem() { + let id: ChunkId = stem.into(); + println!("id: {:?}", id); + self.insert_meta(&id, &meta); + } + } + } + println!(""); + } + Ok(()) } pub fn save(&mut self, meta: &ChunkMeta, chunk: &DataChunk) -> anyhow::Result { let id = ChunkId::new(); self.store.save(&id, meta, chunk)?; + self.insert_meta(&id, meta); + Ok(id) + } + + fn insert_meta(&mut self, id: &ChunkId, meta: &ChunkMeta) { self.index.insert(id.clone(), "sha256", meta.sha256()); if meta.is_generation() { self.index.insert_generation(id.clone()); } - Ok(id) } pub fn load(&self, id: &ChunkId) -> anyhow::Result { diff --git a/subplot/server.py b/subplot/server.py index dca7f10..5cc9d9b 100644 --- a/subplot/server.py +++ b/subplot/server.py @@ -24,7 +24,8 @@ def start_chunk_server(ctx): shutil.copy(os.path.join(srcdir, x), x) chunks = "chunks" - os.mkdir(chunks) + if not os.path.exists(chunks): + os.mkdir(chunks) port = random.randint(2000, 30000) ctx["config"] = config = { diff --git a/subplot/server.yaml b/subplot/server.yaml index e7a72b2..2cc2b5f 100644 --- a/subplot/server.yaml +++ b/subplot/server.yaml @@ -2,6 +2,9 @@ function: start_chunk_server cleanup: stop_chunk_server +- when: "the chunk server is stopped" + function: stop_chunk_server + - when: "I POST (?P\\S+) to (?P\\S+), with (?P
\\S+): (?P.*)" regex: true function: post_file -- cgit v1.2.1