summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2021-02-06 16:42:44 +0000
committerLars Wirzenius <liw@liw.fi>2021-02-06 16:42:44 +0000
commitaa75f0d5709fb4062900db5ab5b6e2598b6af667 (patch)
tree861aa3b61e16e2a486c61eabf1bf2e27a223679e
parent375d05a8492c391b229b65a8752c4151ffd5bb20 (diff)
parent61aca01941d3bdf324a207f2b53dbf7128169142 (diff)
downloadobnam2-aa75f0d5709fb4062900db5ab5b6e2598b6af667.tar.gz
Merge branch 'chunk-size' into 'main'
Make chunk size configurable Closes #44 See merge request larswirzenius/obnam!88
-rw-r--r--obnam.md27
-rw-r--r--src/backup_run.rs4
-rw-r--r--src/bin/obnam-server.rs2
-rw-r--r--src/bin/obnam.rs8
-rw-r--r--src/client.rs38
-rw-r--r--src/cmd/backup.rs10
-rw-r--r--src/index.rs15
-rw-r--r--src/indexedstore.rs28
-rw-r--r--subplot/data.py15
-rw-r--r--subplot/data.yaml4
-rw-r--r--subplot/server.py21
-rw-r--r--subplot/server.yaml3
12 files changed, 144 insertions, 31 deletions
diff --git a/obnam.md b/obnam.md
index b41a118..34535a5 100644
--- a/obnam.md
+++ b/obnam.md
@@ -771,6 +771,10 @@ The server has the following API for managing chunks:
* `GET /chunks?sha256=xyzzy` &ndash; find chunks on the server whose
metadata indicates their contents has a given SHA256 checksum
* `GET /chunks?generation=true` &ndash; find generation chunks
+* `GET /chunks?data=True` &ndash; find chunks with file data
+ - this is meant for testing only
+ - it excludes generation chunks, and chunks used to store the
+ generation's SQLite file
HTTP status codes are used to indicate if a request succeeded or not,
using the customary meanings.
@@ -858,6 +862,7 @@ when I POST data.dat to /chunks, with chunk-meta: {"sha256":"abc"}
then HTTP status code is 201
and content-type is application/json
and the JSON body has a field chunk_id, henceforth ID
+and server has 1 file chunks
~~~
We must be able to retrieve it.
@@ -1145,6 +1150,28 @@ given a manifest of the directory live restored in rest in rest.yaml
then files live.yaml and rest.yaml match
~~~
+## Set chunk size
+
+This scenario verifies that the user can set the chunk size in the
+configuration file. The chunk size only affects the chunks of live
+data.
+
+~~~scenario
+given an installed obnam
+given a running chunk server
+given a client config based on tiny-chunk-size.yaml
+given a file live/data.dat containing "abc"
+when I run obnam --config tiny-chunk-size.yaml backup
+then server has 3 file chunks
+~~~
+
+~~~{#tiny-chunk-size.yaml .file .yaml .numberLines}
+verify_tls_cert: false
+root: live
+chunk_size: 1
+~~~
+
+
## Backup or not for the right reason
The decision of whether to back up a file or keep the version in the
diff --git a/src/backup_run.rs b/src/backup_run.rs
index 05d5988..fce9a73 100644
--- a/src/backup_run.rs
+++ b/src/backup_run.rs
@@ -30,14 +30,14 @@ pub enum BackupError {
pub type BackupResult<T> = Result<T, BackupError>;
impl BackupRun {
- pub fn new(config: &ClientConfig, buffer_size: usize) -> BackupResult<Self> {
+ pub fn new(config: &ClientConfig) -> BackupResult<Self> {
let client = BackupClient::new(config)?;
let policy = BackupPolicy::new();
let progress = BackupProgress::new();
Ok(Self {
client,
policy,
- buffer_size,
+ buffer_size: config.chunk_size,
progress,
})
}
diff --git a/src/bin/obnam-server.rs b/src/bin/obnam-server.rs
index 19f2e99..0e9d4e6 100644
--- a/src/bin/obnam-server.rs
+++ b/src/bin/obnam-server.rs
@@ -188,6 +188,8 @@ pub async fn search_chunks(
}
if key == "generation" && value == "true" {
store.find_generations().expect("SQL lookup failed")
+ } else if key == "data" && value == "true" {
+ store.find_file_chunks().expect("SQL lookup failed")
} else if key == "sha256" {
store.find_by_sha256(value).expect("SQL lookup failed")
} else {
diff --git a/src/bin/obnam.rs b/src/bin/obnam.rs
index 9c5d3f4..8778a73 100644
--- a/src/bin/obnam.rs
+++ b/src/bin/obnam.rs
@@ -6,8 +6,6 @@ use obnam::cmd::{backup, get_chunk, list, list_files, restore, show_config, show
use std::path::{Path, PathBuf};
use structopt::StructOpt;
-const BUFFER_SIZE: usize = 1024 * 1024;
-
fn main() -> anyhow::Result<()> {
let opt = Opt::from_args();
let config_file = match opt.config {
@@ -15,15 +13,13 @@ fn main() -> anyhow::Result<()> {
Some(ref path) => path.to_path_buf(),
};
let config = ClientConfig::read_config(&config_file)?;
- if let Some(ref log) = config.log {
- setup_logging(&log)?;
- }
+ setup_logging(&config.log)?;
info!("client starts");
debug!("{:?}", opt);
let result = match opt.cmd {
- Command::Backup => backup(&config, BUFFER_SIZE),
+ Command::Backup => backup(&config),
Command::List => list(&config),
Command::ShowGeneration { gen_id } => show_generation(&config, &gen_id),
Command::ListFiles { gen_id } => list_files(&config, &gen_id),
diff --git a/src/client.rs b/src/client.rs
index 7cd6df7..7a4ce21 100644
--- a/src/client.rs
+++ b/src/client.rs
@@ -17,16 +17,35 @@ use std::fs::File;
use std::io::prelude::*;
use std::path::{Path, PathBuf};
+const DEFAULT_CHUNK_SIZE: usize = 1024 * 1024;
+const DEVNULL: &str = "/dev/null";
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+struct TentativeClientConfig {
+ server_url: String,
+ verify_tls_cert: Option<bool>,
+ chunk_size: Option<usize>,
+ root: PathBuf,
+ log: Option<PathBuf>,
+}
+
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct ClientConfig {
pub server_url: String,
pub verify_tls_cert: bool,
+ pub chunk_size: usize,
pub root: PathBuf,
- pub log: Option<PathBuf>,
+ pub log: PathBuf,
}
#[derive(Debug, thiserror::Error)]
pub enum ClientConfigError {
+ #[error("server_url is empty")]
+ ServerUrlIsEmpty,
+
+ #[error("backup root is unset or empty")]
+ NoBackupRoot,
+
#[error("server URL doesn't use https: {0}")]
NotHttps(String),
@@ -43,15 +62,30 @@ impl ClientConfig {
pub fn read_config(filename: &Path) -> ClientConfigResult<Self> {
trace!("read_config: filename={:?}", filename);
let config = std::fs::read_to_string(filename)?;
- let config: ClientConfig = serde_yaml::from_str(&config)?;
+ let tentative: TentativeClientConfig = serde_yaml::from_str(&config)?;
+
+ let config = ClientConfig {
+ server_url: tentative.server_url,
+ root: tentative.root,
+ verify_tls_cert: tentative.verify_tls_cert.or(Some(false)).unwrap(),
+ chunk_size: tentative.chunk_size.or(Some(DEFAULT_CHUNK_SIZE)).unwrap(),
+ log: tentative.log.or(Some(PathBuf::from(DEVNULL))).unwrap(),
+ };
+
config.check()?;
Ok(config)
}
fn check(&self) -> Result<(), ClientConfigError> {
+ if self.server_url.is_empty() {
+ return Err(ClientConfigError::ServerUrlIsEmpty);
+ }
if !self.server_url.starts_with("https://") {
return Err(ClientConfigError::NotHttps(self.server_url.to_string()));
}
+ if self.root.to_string_lossy().is_empty() {
+ return Err(ClientConfigError::NoBackupRoot);
+ }
Ok(())
}
}
diff --git a/src/cmd/backup.rs b/src/cmd/backup.rs
index a43a622..fd1d876 100644
--- a/src/cmd/backup.rs
+++ b/src/cmd/backup.rs
@@ -7,10 +7,12 @@ use log::info;
use std::time::SystemTime;
use tempfile::NamedTempFile;
-pub fn backup(config: &ClientConfig, buffer_size: usize) -> Result<(), ObnamError> {
+const SQLITE_CHUNK_SIZE: usize = 1024 * 1024;
+
+pub fn backup(config: &ClientConfig) -> Result<(), ObnamError> {
let runtime = SystemTime::now();
- let run = BackupRun::new(config, buffer_size)?;
+ let run = BackupRun::new(config)?;
// Create a named temporary file. We don't meed the open file
// handle, so we discard that.
@@ -52,7 +54,9 @@ pub fn backup(config: &ClientConfig, buffer_size: usize) -> Result<(), ObnamErro
// Upload the SQLite file, i.e., the named temporary file, which
// still exists, since we persisted it above.
- let gen_id = run.client().upload_generation(&newname, buffer_size)?;
+ let gen_id = run
+ .client()
+ .upload_generation(&newname, SQLITE_CHUNK_SIZE)?;
println!("status: OK");
println!("duration: {}", runtime.elapsed()?.as_secs());
println!("file-count: {}", file_count);
diff --git a/src/index.rs b/src/index.rs
index f7300da..9386e73 100644
--- a/src/index.rs
+++ b/src/index.rs
@@ -75,6 +75,10 @@ impl Index {
pub fn find_generations(&self) -> IndexResult<Vec<ChunkId>> {
sql::find_generations(&self.conn)
}
+
+ pub fn all_chunks(&self) -> IndexResult<Vec<ChunkId>> {
+ sql::find_chunk_ids(&self.conn)
+ }
}
#[cfg(test)]
@@ -243,6 +247,17 @@ mod sql {
Ok(ids)
}
+ pub fn find_chunk_ids(conn: &Connection) -> IndexResult<Vec<ChunkId>> {
+ let mut stmt = conn.prepare("SELECT id FROM chunks WHERE generation IS 0")?;
+ let iter = stmt.query_map(params![], |row| row_to_id(row))?;
+ let mut ids = vec![];
+ for x in iter {
+ let x = x?;
+ ids.push(x);
+ }
+ Ok(ids)
+ }
+
fn row_to_meta(row: &Row) -> rusqlite::Result<ChunkMeta> {
let sha256: String = row.get(row.column_index("sha256")?)?;
let generation: i32 = row.get(row.column_index("generation")?)?;
diff --git a/src/indexedstore.rs b/src/indexedstore.rs
index 3f347dd..f2d1831 100644
--- a/src/indexedstore.rs
+++ b/src/indexedstore.rs
@@ -1,8 +1,9 @@
-use crate::chunk::DataChunk;
+use crate::chunk::{DataChunk, GenerationChunk, GenerationChunkError};
use crate::chunkid::ChunkId;
use crate::chunkmeta::ChunkMeta;
use crate::index::{Index, IndexError};
use crate::store::{Store, StoreError};
+use std::collections::HashSet;
use std::path::Path;
/// A store for chunks and their metadata.
@@ -21,6 +22,9 @@ pub enum IndexedError {
#[error(transparent)]
IndexError(#[from] IndexError),
+ #[error(transparent)]
+ GenerationChunkError(#[from] GenerationChunkError),
+
/// An error from Store.
#[error(transparent)]
SqlError(#[from] StoreError),
@@ -64,6 +68,28 @@ impl IndexedStore {
Ok(self.index.find_generations()?)
}
+ pub fn find_file_chunks(&self) -> IndexedResult<Vec<ChunkId>> {
+ let gen_ids = self.find_generations()?;
+
+ let mut sql_chunks: HashSet<ChunkId> = HashSet::new();
+ for id in gen_ids {
+ let gen_chunk = self.store.load(&id)?;
+ let gen = GenerationChunk::from_data_chunk(&gen_chunk)?;
+ for sqlite_chunk_id in gen.chunk_ids() {
+ sql_chunks.insert(sqlite_chunk_id.clone());
+ }
+ }
+
+ let all_chunk_ids = self.index.all_chunks()?;
+ let file_chunks = all_chunk_ids
+ .iter()
+ .filter(|id| !sql_chunks.contains(id))
+ .map(|id| id.clone())
+ .collect();
+
+ Ok(file_chunks)
+ }
+
pub fn remove(&mut self, id: &ChunkId) -> IndexedResult<()> {
self.index.remove_meta(id)?;
self.store.delete(id)?;
diff --git a/subplot/data.py b/subplot/data.py
index 2a54037..f3faf2b 100644
--- a/subplot/data.py
+++ b/subplot/data.py
@@ -5,14 +5,17 @@ import random
import yaml
-def create_file_with_random_data(ctx, filename=None):
- N = 128
- data = "".join(chr(random.randint(0, 255)) for i in range(N)).encode("UTF-8")
+def create_file_with_given_data(ctx, filename=None, data=None):
+ logging.debug(f"creating file {filename} with {data!r}")
dirname = os.path.dirname(filename) or "."
- logging.debug(f"create_file_with_random_data: dirname={dirname}")
os.makedirs(dirname, exist_ok=True)
- with open(filename, "wb") as f:
- f.write(data)
+ open(filename, "wb").write(data.encode("UTF-8"))
+
+
+def create_file_with_random_data(ctx, filename=None):
+ N = 128
+ data = "".join(chr(random.randint(0, 255)) for i in range(N))
+ create_file_with_given_data(ctx, filename=filename, data=data)
def create_nonutf8_filename(ctx, dirname=None):
diff --git a/subplot/data.yaml b/subplot/data.yaml
index 6d384b8..9538daa 100644
--- a/subplot/data.yaml
+++ b/subplot/data.yaml
@@ -1,6 +1,4 @@
-- given: >
- a file (?P<filename>\\S+) containing "(?P<data>.*)"
- regex: true
+- given: a file {filename} containing "{data:text}"
function: create_file_with_given_data
- given: "a file {filename} containing some random data"
diff --git a/subplot/server.py b/subplot/server.py
index 289e181..df594f7 100644
--- a/subplot/server.py
+++ b/subplot/server.py
@@ -5,8 +5,6 @@ import random
import re
import requests
import shutil
-import socket
-import time
import urllib3
import yaml
@@ -35,7 +33,9 @@ def start_chunk_server(ctx):
"address": f"localhost:{port}",
}
- server_binary = os.path.abspath(os.path.join(srcdir, "target", "debug", "obnam-server"))
+ server_binary = os.path.abspath(
+ os.path.join(srcdir, "target", "debug", "obnam-server")
+ )
filename = "config.yaml"
yaml.safe_dump(config, stream=open(filename, "w"))
@@ -44,11 +44,7 @@ def start_chunk_server(ctx):
ctx["server_url"] = f"https://{config['address']}"
daemon_start_on_port(
- ctx,
- name="obnam-server",
- path=server_binary,
- args=filename,
- port=port,
+ ctx, name="obnam-server", path=server_binary, args=filename, port=port
)
@@ -138,6 +134,15 @@ def json_body_matches(ctx, wanted=None):
assert_eq(body.get(key, "not.there"), wanted[key])
+def server_has_n_file_chunks(ctx, n=None):
+ assert_eq = globals()["assert_eq"]
+ n = int(n)
+ url = f"{ctx['server_url']}/chunks?data=true"
+ _request(ctx, requests.get, url)
+ num_chunks = len(ctx["http.json"])
+ assert_eq(n, num_chunks)
+
+
# Make an HTTP request.
def _request(ctx, method, url, headers=None, data=None):
r = method(url, headers=headers, data=data, verify=False)
diff --git a/subplot/server.yaml b/subplot/server.yaml
index 68f8f0c..60f8a44 100644
--- a/subplot/server.yaml
+++ b/subplot/server.yaml
@@ -43,3 +43,6 @@
- then: "the body matches file {filename}"
function: body_matches_file
+
+- then: "server has {n:int} file chunks"
+ function: server_has_n_file_chunks