summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2021-01-03 11:59:16 +0000
committerLars Wirzenius <liw@liw.fi>2021-01-03 11:59:16 +0000
commitf2dcfc89cb5d8d22c8bdb5f1decdbdbf283d5056 (patch)
tree2cfb0b9e4dd69f93413d0e934e1113534d7352dc
parente6a4eb6edf83a68a6f73094126804beb0c569937 (diff)
parent543107a70eeffaa6932c87c02b43b0fd4f8558e5 (diff)
downloadobnam2-f2dcfc89cb5d8d22c8bdb5f1decdbdbf283d5056.tar.gz
Merge branch 'persisten' into 'main'
feat: load chunk metadata into index at startup See merge request larswirzenius/obnam!56
-rw-r--r--obnam.md43
-rw-r--r--src/bin/obnam-server.rs4
-rw-r--r--src/chunkid.rs9
-rw-r--r--src/indexedstore.rs38
-rw-r--r--subplot/server.py3
-rw-r--r--subplot/server.yaml3
6 files changed, 95 insertions, 5 deletions
diff --git a/obnam.md b/obnam.md
index e877e4c..7fa902f 100644
--- a/obnam.md
+++ b/obnam.md
@@ -777,6 +777,49 @@ then HTTP status code is 404
~~~
+## Persistent across restarts
+
+Chunk storage, and the index of chunk metadata for searches, needs to
+be persistent across restarts. This scenario verifies it is so.
+
+First, create a chunk.
+
+~~~scenario
+given an installed obnam
+and a running chunk server
+and a file data.dat containing some random data
+when I POST data.dat to /chunks, with chunk-meta: {"sha256":"abc"}
+then HTTP status code is 201
+and content-type is application/json
+and the JSON body has a field chunk_id, henceforth ID
+~~~
+
+Then, restart the server.
+
+~~~scenario
+when the chunk server is stopped
+given a running chunk server
+~~~
+
+Can we still find it by its metadata?
+
+~~~scenario
+when I GET /chunks?sha256=abc
+then HTTP status code is 200
+and content-type is application/json
+and the JSON body matches {"<ID>":{"sha256":"abc","generation":null,"ended":null}}
+~~~
+
+Can we still retrieve it by its identifier?
+
+~~~scenario
+when I GET /chunks/<ID>
+then HTTP status code is 200
+and content-type is application/octet-stream
+and chunk-meta is {"sha256":"abc","generation":null,"ended":null}
+and the body matches file data.dat
+~~~
+
# Acceptance criteria for Obnam as a whole
The scenarios in this chapter apply to Obnam as a whole: the client
diff --git a/src/bin/obnam-server.rs b/src/bin/obnam-server.rs
index dc8aa82..76d018f 100644
--- a/src/bin/obnam-server.rs
+++ b/src/bin/obnam-server.rs
@@ -36,7 +36,9 @@ async fn main() -> anyhow::Result<()> {
return Err(ConfigError::BadServerAddress.into());
}
- let store = IndexedStore::new(&config.chunks);
+ let mut store = IndexedStore::new(&config.chunks);
+ store.fill_index()?;
+ println!("existing generations: {:?}", store.find_generations());
let store = Arc::new(Mutex::new(store));
let store = warp::any().map(move || Arc::clone(&store));
diff --git a/src/chunkid.rs b/src/chunkid.rs
index 73e0b27..9eec41f 100644
--- a/src/chunkid.rs
+++ b/src/chunkid.rs
@@ -2,6 +2,7 @@ use crate::checksummer::sha256;
use rusqlite::types::ToSqlOutput;
use rusqlite::ToSql;
use serde::{Deserialize, Serialize};
+use std::ffi::OsStr;
use std::fmt;
use std::hash::Hash;
use std::str::FromStr;
@@ -68,6 +69,14 @@ impl From<&String> for ChunkId {
}
}
+impl From<&OsStr> for ChunkId {
+ fn from(s: &OsStr) -> Self {
+ ChunkId {
+ id: s.to_string_lossy().to_string(),
+ }
+ }
+}
+
impl FromStr for ChunkId {
type Err = ();
diff --git a/src/indexedstore.rs b/src/indexedstore.rs
index 5a41406..3f6235f 100644
--- a/src/indexedstore.rs
+++ b/src/indexedstore.rs
@@ -3,13 +3,15 @@ use crate::chunkid::ChunkId;
use crate::chunkmeta::ChunkMeta;
use crate::index::Index;
use crate::store::{LoadedChunk, Store};
-use std::path::Path;
+use std::path::{Path, PathBuf};
+use walkdir::WalkDir;
/// A store for chunks and their metadata.
///
/// This combines Store and Index into one interface to make it easier
/// to handle the server side storage of chunks.
pub struct IndexedStore {
+ dirname: PathBuf,
store: Store,
index: Index,
}
@@ -18,17 +20,47 @@ impl IndexedStore {
pub fn new(dirname: &Path) -> Self {
let store = Store::new(dirname);
let index = Index::default();
- Self { store, index }
+ Self {
+ dirname: dirname.to_path_buf(),
+ store,
+ index,
+ }
+ }
+
+ pub fn fill_index(&mut self) -> anyhow::Result<()> {
+ for entry in WalkDir::new(&self.dirname) {
+ let entry = entry?;
+ let path = entry.path();
+ // println!("found entry: {:?} (ext: {:?})", path, path.extension());
+ if let Some(ext) = path.extension() {
+ if ext == "meta" {
+ println!("found meta: {:?}", path);
+ let text = std::fs::read(path)?;
+ let meta: ChunkMeta = serde_json::from_slice(&text)?;
+ if let Some(stem) = path.file_stem() {
+ let id: ChunkId = stem.into();
+ println!("id: {:?}", id);
+ self.insert_meta(&id, &meta);
+ }
+ }
+ }
+ println!("");
+ }
+ Ok(())
}
pub fn save(&mut self, meta: &ChunkMeta, chunk: &DataChunk) -> anyhow::Result<ChunkId> {
let id = ChunkId::new();
self.store.save(&id, meta, chunk)?;
+ self.insert_meta(&id, meta);
+ Ok(id)
+ }
+
+ fn insert_meta(&mut self, id: &ChunkId, meta: &ChunkMeta) {
self.index.insert(id.clone(), "sha256", meta.sha256());
if meta.is_generation() {
self.index.insert_generation(id.clone());
}
- Ok(id)
}
pub fn load(&self, id: &ChunkId) -> anyhow::Result<LoadedChunk> {
diff --git a/subplot/server.py b/subplot/server.py
index dca7f10..5cc9d9b 100644
--- a/subplot/server.py
+++ b/subplot/server.py
@@ -24,7 +24,8 @@ def start_chunk_server(ctx):
shutil.copy(os.path.join(srcdir, x), x)
chunks = "chunks"
- os.mkdir(chunks)
+ if not os.path.exists(chunks):
+ os.mkdir(chunks)
port = random.randint(2000, 30000)
ctx["config"] = config = {
diff --git a/subplot/server.yaml b/subplot/server.yaml
index e7a72b2..2cc2b5f 100644
--- a/subplot/server.yaml
+++ b/subplot/server.yaml
@@ -2,6 +2,9 @@
function: start_chunk_server
cleanup: stop_chunk_server
+- when: "the chunk server is stopped"
+ function: stop_chunk_server
+
- when: "I POST (?P<filename>\\S+) to (?P<path>\\S+), with (?P<header>\\S+): (?P<json>.*)"
regex: true
function: post_file