summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2022-03-07 06:21:45 +0000
committerLars Wirzenius <liw@liw.fi>2022-03-07 06:21:45 +0000
commitf7fc85d8e28315d1d14feefe10825ba5c9984041 (patch)
treee309e3a679349ed63a7b5b858f329eefac946677
parent68a932a43eeb238988beada686c1f175caba4441 (diff)
parent99d516f46bce4048633b85ff2ce0c2446673208e (diff)
downloadobnam2-f7fc85d8e28315d1d14feefe10825ba5c9984041.tar.gz
Merge branch 'liw/db-refactor' into 'main'
speed up backups See merge request obnam/obnam!215
-rw-r--r--Cargo.toml2
-rwxr-xr-xcheck3
-rw-r--r--src/backup_progress.rs6
-rw-r--r--src/backup_run.rs14
-rw-r--r--src/cmd/backup.rs18
-rw-r--r--src/cmd/list_files.rs4
-rw-r--r--src/cmd/restore.rs36
-rw-r--r--src/cmd/show_gen.rs3
-rw-r--r--src/db.rs629
-rw-r--r--src/dbgen.rs327
-rw-r--r--src/error.rs5
-rw-r--r--src/fsentry.rs35
-rw-r--r--src/fsiter.rs20
-rw-r--r--src/generation.rs371
-rw-r--r--src/lib.rs2
-rw-r--r--src/policy.rs6
16 files changed, 1099 insertions, 382 deletions
diff --git a/Cargo.toml b/Cargo.toml
index 17535db..3d69988 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -2,7 +2,7 @@
name = "obnam"
version ="0.7.0"
authors = ["Lars Wirzenius <liw@liw.fi>"]
-edition = "2018"
+edition = "2021"
description = "a backup program"
license = "AGPL-3.0-or-later"
homepage = "https://obnam.org/"
diff --git a/check b/check
index 2da9a65..905bc5b 100755
--- a/check
+++ b/check
@@ -42,6 +42,9 @@ require_cmd pdflatex
# daemonize installation location changed from Debian 10 to 11.
require_cmd daemonize || require_cmd /usr/sbin/daemonize
+$hideok cargo --version
+$hideok rustc --version
+
got_cargo_cmd clippy && cargo clippy --all-targets -q
$hideok cargo build --all-targets
got_cargo_cmd fmt && $hideok cargo fmt -- --check
diff --git a/src/backup_progress.rs b/src/backup_progress.rs
index 52430e4..f119210 100644
--- a/src/backup_progress.rs
+++ b/src/backup_progress.rs
@@ -4,6 +4,8 @@ use crate::generation::GenId;
use indicatif::{ProgressBar, ProgressStyle};
use std::path::Path;
+const SHOW_PROGRESS: bool = true;
+
/// A progress bar abstraction specific to backups.
///
/// The progress bar is different for initial and incremental backups,
@@ -15,7 +17,7 @@ pub struct BackupProgress {
impl BackupProgress {
/// Create a progress bar for an initial backup.
pub fn initial() -> Self {
- let progress = if true {
+ let progress = if SHOW_PROGRESS {
ProgressBar::new(0)
} else {
ProgressBar::hidden()
@@ -35,7 +37,7 @@ impl BackupProgress {
/// Create a progress bar for an incremental backup.
pub fn incremental() -> Self {
- let progress = if true {
+ let progress = if SHOW_PROGRESS {
ProgressBar::new(0)
} else {
ProgressBar::hidden()
diff --git a/src/backup_run.rs b/src/backup_run.rs
index 0b816bf..9454625 100644
--- a/src/backup_run.rs
+++ b/src/backup_run.rs
@@ -7,6 +7,8 @@ use crate::chunker::{ChunkerError, FileChunks};
use crate::chunkid::ChunkId;
use crate::client::{BackupClient, ClientError};
use crate::config::ClientConfig;
+use crate::db::DatabaseError;
+use crate::dbgen::FileId;
use crate::error::ObnamError;
use crate::fsentry::{FilesystemEntry, FilesystemKind};
use crate::fsiter::{AnnotatedFsEntry, FsIterError, FsIterator};
@@ -49,6 +51,10 @@ pub enum BackupError {
#[error(transparent)]
LocalGenerationError(#[from] LocalGenerationError),
+ /// An error using a Database.
+ #[error(transparent)]
+ Database(#[from] DatabaseError),
+
/// An error splitting data into chunks.
#[error(transparent)]
ChunkerError(#[from] ChunkerError),
@@ -84,7 +90,7 @@ struct OneRootBackupOutcome {
#[derive(Debug)]
pub struct RootsBackupOutcome {
/// The number of backed up files.
- pub files_count: i64,
+ pub files_count: FileId,
/// The errors encountered while backing up files.
pub warnings: Vec<BackupError>,
/// CACHEDIR.TAG files that aren't present in in a previous generation.
@@ -126,7 +132,7 @@ impl<'a> BackupRun<'a> {
match genid {
None => {
// Create a new, empty generation.
- NascentGeneration::create(oldname)?;
+ NascentGeneration::create(oldname)?.close()?;
// Open the newly created empty generation.
Ok(LocalGeneration::open(oldname)?)
@@ -190,7 +196,9 @@ impl<'a> BackupRun<'a> {
}
}
}
- new.file_count()
+ let count = new.file_count();
+ new.close()?;
+ count
};
self.finish();
let gen_id = self.upload_nascent_generation(newpath).await?;
diff --git a/src/cmd/backup.rs b/src/cmd/backup.rs
index 92b0f40..dae9811 100644
--- a/src/cmd/backup.rs
+++ b/src/cmd/backup.rs
@@ -3,13 +3,14 @@
use crate::backup_run::BackupRun;
use crate::client::BackupClient;
use crate::config::ClientConfig;
+use crate::dbgen::FileId;
use crate::error::ObnamError;
use crate::generation::GenId;
use log::info;
use std::time::SystemTime;
use structopt::StructOpt;
-use tempfile::NamedTempFile;
+use tempfile::tempdir;
use tokio::runtime::Runtime;
/// Make a backup.
@@ -29,21 +30,22 @@ impl Backup {
let client = BackupClient::new(config)?;
let genlist = client.list_generations().await?;
- let oldtemp = NamedTempFile::new()?;
- let newtemp = NamedTempFile::new()?;
+ let temp = tempdir()?;
+ let oldtemp = temp.path().join("old.db");
+ let newtemp = temp.path().join("new.db");
let (is_incremental, outcome) = match genlist.resolve("latest") {
Err(_) => {
info!("fresh backup without a previous generation");
let mut run = BackupRun::initial(config, &client)?;
- let old = run.start(None, oldtemp.path()).await?;
- (false, run.backup_roots(config, &old, newtemp.path()).await?)
+ let old = run.start(None, &oldtemp).await?;
+ (false, run.backup_roots(config, &old, &newtemp).await?)
}
Ok(old_id) => {
info!("incremental backup based on {}", old_id);
let mut run = BackupRun::incremental(config, &client)?;
- let old = run.start(Some(&old_id), oldtemp.path()).await?;
- (true, run.backup_roots(config, &old, newtemp.path()).await?)
+ let old = run.start(Some(&old_id), &oldtemp).await?;
+ (true, run.backup_roots(config, &old, &newtemp).await?)
}
};
@@ -76,7 +78,7 @@ impl Backup {
fn report_stats(
runtime: &SystemTime,
- file_count: i64,
+ file_count: FileId,
gen_id: &GenId,
num_warnings: usize,
) -> Result<(), ObnamError> {
diff --git a/src/cmd/list_files.rs b/src/cmd/list_files.rs
index 12d34b1..9126564 100644
--- a/src/cmd/list_files.rs
+++ b/src/cmd/list_files.rs
@@ -34,8 +34,8 @@ impl ListFiles {
let gen = client.fetch_generation(&gen_id, temp.path()).await?;
for file in gen.files()?.iter()? {
- let file = file?;
- println!("{}", format_entry(file.entry(), file.reason()));
+ let (_, entry, reason, _) = file?;
+ println!("{}", format_entry(&entry, reason));
}
Ok(())
diff --git a/src/cmd/restore.rs b/src/cmd/restore.rs
index 7b3d95e..43d9679 100644
--- a/src/cmd/restore.rs
+++ b/src/cmd/restore.rs
@@ -3,6 +3,8 @@
use crate::backup_reason::Reason;
use crate::client::{BackupClient, ClientError};
use crate::config::ClientConfig;
+use crate::db::DatabaseError;
+use crate::dbgen::FileId;
use crate::error::ObnamError;
use crate::fsentry::{FilesystemEntry, FilesystemKind};
use crate::generation::{LocalGeneration, LocalGenerationError};
@@ -53,26 +55,16 @@ impl Restore {
info!("restoring {} files", gen.file_count()?);
let progress = create_progress_bar(gen.file_count()?, true);
for file in gen.files()?.iter()? {
- let file = file?;
- match file.reason() {
+ let (fileno, entry, reason, _) = file?;
+ match reason {
Reason::FileError => (),
- _ => {
- restore_generation(
- &client,
- &gen,
- file.fileno(),
- file.entry(),
- &self.to,
- &progress,
- )
- .await?
- }
+ _ => restore_generation(&client, &gen, fileno, &entry, &self.to, &progress).await?,
}
}
for file in gen.files()?.iter()? {
- let file = file?;
- if file.entry().is_dir() {
- restore_directory_metadata(file.entry(), &self.to)?;
+ let (_, entry, _, _) = file?;
+ if entry.is_dir() {
+ restore_directory_metadata(&entry, &self.to)?;
}
}
progress.finish();
@@ -84,6 +76,10 @@ impl Restore {
/// Possible errors from restoring.
#[derive(Debug, thiserror::Error)]
pub enum RestoreError {
+ /// An error using a Database.
+ #[error(transparent)]
+ Database(#[from] DatabaseError),
+
/// Failed to create a name pipe.
#[error("Could not create named pipe (FIFO) {0}")]
NamedPipeCreationError(PathBuf),
@@ -132,7 +128,7 @@ pub enum RestoreError {
async fn restore_generation(
client: &BackupClient,
gen: &LocalGeneration,
- fileid: i64,
+ fileid: FileId,
entry: &FilesystemEntry,
to: &Path,
progress: &ProgressBar,
@@ -185,7 +181,7 @@ async fn restore_regular(
client: &BackupClient,
gen: &LocalGeneration,
path: &Path,
- fileid: i64,
+ fileid: FileId,
entry: &FilesystemEntry,
) -> Result<(), RestoreError> {
debug!("restoring regular {}", path.display());
@@ -293,9 +289,9 @@ fn path_to_cstring(path: &Path) -> CString {
CString::new(path).unwrap()
}
-fn create_progress_bar(file_count: i64, verbose: bool) -> ProgressBar {
+fn create_progress_bar(file_count: FileId, verbose: bool) -> ProgressBar {
let progress = if verbose {
- ProgressBar::new(file_count as u64)
+ ProgressBar::new(file_count)
} else {
ProgressBar::hidden()
};
diff --git a/src/cmd/show_gen.rs b/src/cmd/show_gen.rs
index 6c8ba19..006e0e0 100644
--- a/src/cmd/show_gen.rs
+++ b/src/cmd/show_gen.rs
@@ -35,8 +35,7 @@ impl ShowGeneration {
let mut files = files.iter()?;
let total_bytes = files.try_fold(0, |acc, file| {
- file.map(|file| {
- let e = file.entry();
+ file.map(|(_, e, _, _)| {
if e.kind() == FilesystemKind::Regular {
acc + e.len()
} else {
diff --git a/src/db.rs b/src/db.rs
new file mode 100644
index 0000000..b4de399
--- /dev/null
+++ b/src/db.rs
@@ -0,0 +1,629 @@
+//! A database abstraction around SQLite for Obnam.
+//!
+//! This abstraction provided the bare minimum that Obnam needs, while
+//! trying to be as performant as possible, especially for inserting
+//! rows. Only data types needed by Obnam are supported.
+//!
+//! Note that this abstraction is entirely synchronous. This is for
+//! simplicity, as SQLite only allows one write at a time.
+
+use crate::fsentry::FilesystemEntry;
+use rusqlite::{params, types::ToSqlOutput, CachedStatement, Connection, OpenFlags, Row, ToSql};
+use std::collections::HashSet;
+use std::convert::TryFrom;
+use std::path::{Path, PathBuf};
+
+/// A database.
+pub struct Database {
+ conn: Connection,
+}
+
+impl Database {
+ /// Create a new database file for an empty database.
+ ///
+ /// The database can be written to.
+ pub fn create<P: AsRef<Path>>(filename: P) -> Result<Self, DatabaseError> {
+ if filename.as_ref().exists() {
+ return Err(DatabaseError::Exists(filename.as_ref().to_path_buf()));
+ }
+ let flags = OpenFlags::SQLITE_OPEN_CREATE | OpenFlags::SQLITE_OPEN_READ_WRITE;
+ let conn = Connection::open_with_flags(filename, flags)?;
+ conn.execute("BEGIN", params![])?;
+ Ok(Self { conn })
+ }
+
+ /// Open an existing database file in read only mode.
+ pub fn open<P: AsRef<Path>>(filename: P) -> Result<Self, DatabaseError> {
+ let flags = OpenFlags::SQLITE_OPEN_READ_ONLY;
+ let conn = Connection::open_with_flags(filename, flags)?;
+ Ok(Self { conn })
+ }
+
+ /// Close an open database, committing any changes to disk.
+ pub fn close(self) -> Result<(), DatabaseError> {
+ self.conn.execute("COMMIT", params![])?;
+ self.conn
+ .close()
+ .map_err(|(_, err)| DatabaseError::Rusqlite(err))?;
+ Ok(())
+ }
+
+ /// Create a table in the database.
+ pub fn create_table(&self, table: &Table) -> Result<(), DatabaseError> {
+ let sql = sql_statement::create_table(table);
+ self.conn.execute(&sql, params![])?;
+ Ok(())
+ }
+
+ /// Create an index in the database.
+ pub fn create_index(
+ &self,
+ name: &str,
+ table: &Table,
+ column: &str,
+ ) -> Result<(), DatabaseError> {
+ let sql = sql_statement::create_index(name, table, column);
+ self.conn.execute(&sql, params![])?;
+ Ok(())
+ }
+
+ /// Insert a row in a table.
+ pub fn insert(&mut self, table: &Table, values: &[Value]) -> Result<(), DatabaseError> {
+ let mut stmt = self.conn.prepare_cached(table.insert())?;
+ assert!(table.has_columns(values));
+ // The ToSql trait implementation for Obnam values can't ever
+ // fail, so we don't handle the error case in the parameter
+ // creation below.
+ stmt.execute(rusqlite::params_from_iter(values.iter().map(|v| {
+ v.to_sql()
+ .expect("conversion of Obnam value to SQLite value failed unexpectedly")
+ })))?;
+ Ok(())
+ }
+
+ /// Return an iterator for all rows in a table.
+ pub fn all_rows<T>(
+ &self,
+ table: &Table,
+ rowfunc: &'static dyn Fn(&Row) -> Result<T, rusqlite::Error>,
+ ) -> Result<SqlResults<T>, DatabaseError> {
+ let sql = sql_statement::select_all_rows(table);
+ SqlResults::new(
+ &self.conn,
+ &sql,
+ None,
+ Box::new(|stmt, _| {
+ let iter = stmt.query_map(params![], |row| rowfunc(row))?;
+ let iter = iter.map(|x| match x {
+ Ok(t) => Ok(t),
+ Err(e) => Err(DatabaseError::Rusqlite(e)),
+ });
+ Ok(Box::new(iter))
+ }),
+ )
+ }
+
+ /// Return rows that have a given value in a given column.
+ ///
+ /// This is simplistic, but for Obnam, it provides all the SQL
+ /// SELECT ... WHERE that's needed, and there's no point in making
+ /// this more generic than is needed.
+ pub fn some_rows<T>(
+ &self,
+ table: &Table,
+ value: &Value,
+ rowfunc: &'static dyn Fn(&Row) -> Result<T, rusqlite::Error>,
+ ) -> Result<SqlResults<T>, DatabaseError> {
+ assert!(table.has_column(value));
+ let sql = sql_statement::select_some_rows(table, value.name());
+ SqlResults::new(
+ &self.conn,
+ &sql,
+ Some(OwnedValue::from(value)),
+ Box::new(|stmt, value| {
+ let iter = stmt.query_map(params![value], |row| rowfunc(row))?;
+ let iter = iter.map(|x| match x {
+ Ok(t) => Ok(t),
+ Err(e) => Err(DatabaseError::Rusqlite(e)),
+ });
+ Ok(Box::new(iter))
+ }),
+ )
+ }
+}
+
+/// Possible errors from a database.
+#[derive(Debug, thiserror::Error)]
+pub enum DatabaseError {
+ /// An error from the rusqlite crate.
+ #[error(transparent)]
+ Rusqlite(#[from] rusqlite::Error),
+
+ /// The database being created already exists.
+ #[error("Database {0} already exists")]
+ Exists(PathBuf),
+}
+
+// A pointer to a "fallible iterator" over values of type `T`, which is to say it's an iterator
+// over values of type `Result<T, DatabaseError>`. The iterator is only valid for the
+// lifetime 'stmt.
+//
+// The fact that it's a pointer (`Box<dyn ...>`) means we don't care what the actual type of
+// the iterator is, and who produces it.
+type SqlResultsIterator<'stmt, T> = Box<dyn Iterator<Item = Result<T, DatabaseError>> + 'stmt>;
+
+// A pointer to a function which, when called on a prepared SQLite statement, would create
+// a "fallible iterator" over values of type `ItemT`. (See above for an explanation of what
+// a "fallible iterator" is.)
+//
+// The iterator is only valid for the lifetime of the associated SQLite statement; we
+// call this lifetime 'stmt, and use it both both on the reference and the returned iterator.
+//
+// Now we're in a pickle: all named lifetimes have to be declared _somewhere_, but we can't add
+// 'stmt to the signature of `CreateIterFn` because then we'll have to specify it when we
+// define the function. Obviously, at that point we won't yet have a `Statement`, and thus we
+// would have no idea what its lifetime is going to be. So we can't put the 'stmt lifetime into
+// the signature of `CreateIterFn`.
+//
+// That's what `for<'stmt>` is for. This is a so-called ["higher-rank trait bound"][hrtb], and
+// it enables us to say that a function is valid for *some* lifetime 'stmt that we pass into it
+// at the call site. It lets Rust continue to track lifetimes even though `CreateIterFn`
+// interferes by "hiding" the 'stmt lifetime from its signature.
+//
+// [hrtb]: https://doc.rust-lang.org/nomicon/hrtb.html
+type CreateIterFn<'conn, ItemT> = Box<
+ dyn for<'stmt> Fn(
+ &'stmt mut CachedStatement<'conn>,
+ &Option<OwnedValue>,
+ ) -> Result<SqlResultsIterator<'stmt, ItemT>, DatabaseError>,
+>;
+
+/// An iterator over rows from a query.
+pub struct SqlResults<'conn, ItemT> {
+ stmt: CachedStatement<'conn>,
+ value: Option<OwnedValue>,
+ create_iter: CreateIterFn<'conn, ItemT>,
+}
+
+impl<'conn, ItemT> SqlResults<'conn, ItemT> {
+ fn new(
+ conn: &'conn Connection,
+ statement: &str,
+ value: Option<OwnedValue>,
+ create_iter: CreateIterFn<'conn, ItemT>,
+ ) -> Result<Self, DatabaseError> {
+ let stmt = conn.prepare_cached(statement)?;
+ Ok(Self {
+ stmt,
+ value,
+ create_iter,
+ })
+ }
+
+ /// Create an iterator over results.
+ pub fn iter(&'_ mut self) -> Result<SqlResultsIterator<'_, ItemT>, DatabaseError> {
+ (self.create_iter)(&mut self.stmt, &self.value)
+ }
+}
+
+/// Describe a table in a row.
+pub struct Table {
+ table: String,
+ columns: Vec<Column>,
+ insert: Option<String>,
+ column_names: HashSet<String>,
+}
+
+impl Table {
+ /// Create a new table description without columns.
+ ///
+ /// The table description is not "built". You must add columns and
+ /// then call the [`build`] method.
+ pub fn new(table: &str) -> Self {
+ Self {
+ table: table.to_string(),
+ columns: vec![],
+ insert: None,
+ column_names: HashSet::new(),
+ }
+ }
+
+ /// Append a column.
+ pub fn column(mut self, column: Column) -> Self {
+ self.column_names.insert(column.name().to_string());
+ self.columns.push(column);
+ self
+ }
+
+ /// Finish building the table description.
+ pub fn build(mut self) -> Self {
+ assert!(self.insert.is_none());
+ self.insert = Some(sql_statement::insert(&self));
+ self
+ }
+
+ fn has_columns(&self, values: &[Value]) -> bool {
+ assert!(self.insert.is_some());
+ for v in values.iter() {
+ if !self.column_names.contains(v.name()) {
+ return false;
+ }
+ }
+ true
+ }
+
+ fn has_column(&self, value: &Value) -> bool {
+ assert!(self.insert.is_some());
+ self.column_names.contains(value.name())
+ }
+
+ fn insert(&self) -> &str {
+ assert!(self.insert.is_some());
+ self.insert.as_ref().unwrap()
+ }
+
+ /// What is the name of the table?
+ pub fn name(&self) -> &str {
+ &self.table
+ }
+
+ /// How many columns does the table have?
+ pub fn num_columns(&self) -> usize {
+ self.columns.len()
+ }
+
+ /// What are the names of the columns in the table?
+ pub fn column_names(&self) -> impl Iterator<Item = &str> {
+ self.columns.iter().map(|c| c.name())
+ }
+
+ /// Return SQL column definitions for the table.
+ pub fn column_definitions(&self) -> String {
+ let mut ret = String::new();
+ for c in self.columns.iter() {
+ if !ret.is_empty() {
+ ret.push(',');
+ }
+ ret.push_str(c.name());
+ ret.push(' ');
+ ret.push_str(c.typename());
+ }
+ ret
+ }
+}
+
+/// A column in a table description.
+pub enum Column {
+ /// An integer primary key.
+ PrimaryKey(String),
+ /// An integer.
+ Int(String),
+ /// A text string.
+ Text(String),
+ /// A binary string.
+ Blob(String),
+ /// A boolean.
+ Bool(String),
+}
+
+impl Column {
+ fn name(&self) -> &str {
+ match self {
+ Self::PrimaryKey(name) => name,
+ Self::Int(name) => name,
+ Self::Text(name) => name,
+ Self::Blob(name) => name,
+ Self::Bool(name) => name,
+ }
+ }
+
+ fn typename(&self) -> &str {
+ match self {
+ Self::PrimaryKey(_) => "INTEGER PRIMARY KEY",
+ Self::Int(_) => "INTEGER",
+ Self::Text(_) => "TEXT",
+ Self::Blob(_) => "BLOB",
+ Self::Bool(_) => "BOOLEAN",
+ }
+ }
+
+ /// Create an integer primary key column.
+ pub fn primary_key(name: &str) -> Self {
+ Self::PrimaryKey(name.to_string())
+ }
+
+ /// Create an integer column.
+ pub fn int(name: &str) -> Self {
+ Self::Int(name.to_string())
+ }
+
+ /// Create a text string column.
+ pub fn text(name: &str) -> Self {
+ Self::Text(name.to_string())
+ }
+
+ /// Create a binary string column.
+ pub fn blob(name: &str) -> Self {
+ Self::Blob(name.to_string())
+ }
+
+ /// Create a boolean column.
+ pub fn bool(name: &str) -> Self {
+ Self::Bool(name.to_string())
+ }
+}
+
+/// A value in a named column.
+#[derive(Debug)]
+pub enum Value<'a> {
+ /// An integer primary key.
+ PrimaryKey(&'a str, u64),
+ /// An integer.
+ Int(&'a str, u64),
+ /// A text string.
+ Text(&'a str, &'a str),
+ /// A binary string.
+ Blob(&'a str, &'a [u8]),
+ /// A boolean.
+ Bool(&'a str, bool),
+}
+
+impl<'a> Value<'a> {
+ /// What column should store this value?
+ pub fn name(&self) -> &str {
+ match self {
+ Self::PrimaryKey(name, _) => name,
+ Self::Int(name, _) => name,
+ Self::Text(name, _) => name,
+ Self::Blob(name, _) => name,
+ Self::Bool(name, _) => name,
+ }
+ }
+
+ /// Create an integer primary key value.
+ pub fn primary_key(name: &'a str, value: u64) -> Self {
+ Self::PrimaryKey(name, value)
+ }
+
+ /// Create an integer value.
+ pub fn int(name: &'a str, value: u64) -> Self {
+ Self::Int(name, value)
+ }
+
+ /// Create a text string value.
+ pub fn text(name: &'a str, value: &'a str) -> Self {
+ Self::Text(name, value)
+ }
+
+ /// Create a binary string value.
+ pub fn blob(name: &'a str, value: &'a [u8]) -> Self {
+ Self::Blob(name, value)
+ }
+
+ /// Create a boolean value.
+ pub fn bool(name: &'a str, value: bool) -> Self {
+ Self::Bool(name, value)
+ }
+}
+
+impl<'a> ToSql for Value<'a> {
+ // The trait defines to_sql to return a Result. However, for our
+ // particular case, to_sql can't ever fail. We only store values
+ // in types for which conversion always succeeds: integer,
+ // boolean, text, and blob. _For us_, the caller need never worry
+ // that the conversion fails, but we can't express that in the
+ // type system.
+ fn to_sql(&self) -> Result<rusqlite::types::ToSqlOutput, rusqlite::Error> {
+ use rusqlite::types::ValueRef;
+ let v = match self {
+ Self::PrimaryKey(_, v) => ValueRef::Integer(
+ i64::try_from(*v)
+ .map_err(|err| rusqlite::Error::ToSqlConversionFailure(Box::new(err)))?,
+ ),
+ Self::Int(_, v) => ValueRef::Integer(
+ i64::try_from(*v)
+ .map_err(|err| rusqlite::Error::ToSqlConversionFailure(Box::new(err)))?,
+ ),
+ Self::Bool(_, v) => ValueRef::Integer(
+ i64::try_from(*v)
+ .map_err(|err| rusqlite::Error::ToSqlConversionFailure(Box::new(err)))?,
+ ),
+ Self::Text(_, v) => ValueRef::Text(v.as_ref()),
+ Self::Blob(_, v) => ValueRef::Blob(v),
+ };
+ Ok(ToSqlOutput::Borrowed(v))
+ }
+}
+
+/// Like a Value, but owns the data.
+pub enum OwnedValue {
+ /// An integer primary key.
+ PrimaryKey(String, u64),
+ /// An integer.
+ Int(String, u64),
+ /// A text string.
+ Text(String, String),
+ /// A binary string.
+ Blob(String, Vec<u8>),
+ /// A boolean.
+ Bool(String, bool),
+}
+
+impl From<&Value<'_>> for OwnedValue {
+ fn from(v: &Value) -> Self {
+ match *v {
+ Value::PrimaryKey(name, v) => Self::PrimaryKey(name.to_string(), v),
+ Value::Int(name, v) => Self::Int(name.to_string(), v),
+ Value::Text(name, v) => Self::Text(name.to_string(), v.to_string()),
+ Value::Blob(name, v) => Self::Blob(name.to_string(), v.to_vec()),
+ Value::Bool(name, v) => Self::Bool(name.to_string(), v),
+ }
+ }
+}
+
+impl ToSql for OwnedValue {
+ fn to_sql(&self) -> rusqlite::Result<rusqlite::types::ToSqlOutput> {
+ use rusqlite::types::Value;
+ let v = match self {
+ Self::PrimaryKey(_, v) => Value::Integer(
+ i64::try_from(*v)
+ .map_err(|err| rusqlite::Error::ToSqlConversionFailure(Box::new(err)))?,
+ ),
+ Self::Int(_, v) => Value::Integer(
+ i64::try_from(*v)
+ .map_err(|err| rusqlite::Error::ToSqlConversionFailure(Box::new(err)))?,
+ ),
+ Self::Bool(_, v) => Value::Integer(
+ i64::try_from(*v)
+ .map_err(|err| rusqlite::Error::ToSqlConversionFailure(Box::new(err)))?,
+ ),
+ Self::Text(_, v) => Value::Text(v.to_string()),
+ Self::Blob(_, v) => Value::Blob(v.to_vec()),
+ };
+ Ok(ToSqlOutput::Owned(v))
+ }
+}
+
+impl rusqlite::types::ToSql for FilesystemEntry {
+ fn to_sql(&self) -> rusqlite::Result<ToSqlOutput<'_>> {
+ let json = serde_json::to_string(self)
+ .map_err(|err| rusqlite::Error::ToSqlConversionFailure(Box::new(err)))?;
+ let json = rusqlite::types::Value::Text(json);
+ Ok(ToSqlOutput::Owned(json))
+ }
+}
+
+mod sql_statement {
+ use super::Table;
+
+ pub fn create_table(table: &Table) -> String {
+ format!(
+ "CREATE TABLE {} ({})",
+ table.name(),
+ table.column_definitions()
+ )
+ }
+
+ pub fn create_index(name: &str, table: &Table, column: &str) -> String {
+ format!("CREATE INDEX {} ON {} ({})", name, table.name(), column,)
+ }
+
+ pub fn insert(table: &Table) -> String {
+ format!(
+ "INSERT INTO {} ({}) VALUES ({})",
+ table.name(),
+ &column_names(table),
+ placeholders(table.column_names().count())
+ )
+ }
+
+ pub fn select_all_rows(table: &Table) -> String {
+ format!("SELECT * FROM {}", table.name())
+ }
+
+ pub fn select_some_rows(table: &Table, column: &str) -> String {
+ format!("SELECT * FROM {} WHERE {} = ?", table.name(), column)
+ }
+
+ fn column_names(table: &Table) -> String {
+ table.column_names().collect::<Vec<&str>>().join(",")
+ }
+
+ fn placeholders(num_columns: usize) -> String {
+ let mut s = String::new();
+ for _ in 0..num_columns {
+ if !s.is_empty() {
+ s.push(',');
+ }
+ s.push('?');
+ }
+ s
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use std::path::Path;
+ use tempfile::tempdir;
+
+ fn get_bar(row: &rusqlite::Row) -> Result<u64, rusqlite::Error> {
+ row.get("bar")
+ }
+
+ fn table() -> Table {
+ Table::new("foo").column(Column::int("bar")).build()
+ }
+
+ fn create_db(file: &Path) -> Database {
+ let table = table();
+ let db = Database::create(file).unwrap();
+ db.create_table(&table).unwrap();
+ db
+ }
+
+ fn open_db(file: &Path) -> Database {
+ Database::open(file).unwrap()
+ }
+
+ fn insert(db: &mut Database, value: u64) {
+ let table = table();
+ db.insert(&table, &[Value::int("bar", value)]).unwrap();
+ }
+
+ fn values(db: Database) -> Vec<u64> {
+ let table = table();
+ let mut rows = db.all_rows(&table, &get_bar).unwrap();
+ let iter = rows.iter().unwrap();
+ let mut values = vec![];
+ for x in iter {
+ values.push(x.unwrap());
+ }
+ values
+ }
+
+ #[test]
+ fn creates_db() {
+ let tmp = tempdir().unwrap();
+ let filename = tmp.path().join("test.db");
+ let db = Database::create(&filename).unwrap();
+ db.close().unwrap();
+ let _ = Database::open(&filename).unwrap();
+ }
+
+ #[test]
+ fn inserts_row() {
+ let tmp = tempdir().unwrap();
+ let filename = tmp.path().join("test.db");
+ let mut db = create_db(&filename);
+ insert(&mut db, 42);
+ db.close().unwrap();
+
+ let db = open_db(&filename);
+ let values = values(db);
+ assert_eq!(values, vec![42]);
+ }
+
+ #[test]
+ fn inserts_many_rows() {
+ const N: u64 = 1000;
+
+ let tmp = tempdir().unwrap();
+ let filename = tmp.path().join("test.db");
+ let mut db = create_db(&filename);
+ for i in 0..N {
+ insert(&mut db, i);
+ }
+ db.close().unwrap();
+
+ let db = open_db(&filename);
+ let values = values(db);
+ assert_eq!(values.len() as u64, N);
+
+ let mut expected = vec![];
+ for i in 0..N {
+ expected.push(i);
+ }
+ assert_eq!(values, expected);
+ }
+}
diff --git a/src/dbgen.rs b/src/dbgen.rs
new file mode 100644
index 0000000..7e54d7d
--- /dev/null
+++ b/src/dbgen.rs
@@ -0,0 +1,327 @@
+//! Database abstraction for generations.
+
+use crate::backup_reason::Reason;
+use crate::chunkid::ChunkId;
+use crate::db::{Column, Database, DatabaseError, SqlResults, Table, Value};
+use crate::fsentry::FilesystemEntry;
+use log::error;
+use std::collections::HashMap;
+use std::os::unix::ffi::OsStrExt;
+use std::path::{Path, PathBuf};
+
+/// Current generation database schema major version.
+pub const SCHEMA_MAJOR: u32 = 0;
+
+/// Current generation database schema minor version.
+pub const SCHEMA_MINOR: u32 = 0;
+
+/// An identifier for a file in a generation.
+pub type FileId = u64;
+
+/// Possible errors from using generation databases.
+#[derive(Debug, thiserror::Error)]
+pub enum GenerationDbError {
+ /// Duplicate file names.
+ #[error("Generation has more than one file with the name {0}")]
+ TooManyFiles(PathBuf),
+
+ /// No 'meta' table in generation.
+ #[error("Generation does not have a 'meta' table")]
+ NoMeta,
+
+ /// Missing from from 'meta' table.
+ #[error("Generation 'meta' table does not have a row {0}")]
+ NoMetaKey(String),
+
+ /// Bad data in 'meta' table.
+ #[error("Generation 'meta' row {0} has badly formed integer: {1}")]
+ BadMetaInteger(String, std::num::ParseIntError),
+
+ /// Local generation uses a schema version that this version of
+ /// Obnam isn't compatible with.
+ #[error("Backup is not compatible with this version of Obnam: {0}.{1}")]
+ Incompatible(u32, u32),
+
+ /// Error from a database
+ #[error(transparent)]
+ Database(#[from] DatabaseError),
+
+ /// Error from JSON.
+ #[error(transparent)]
+ SerdeJsonError(#[from] serde_json::Error),
+
+ /// Error from I/O.
+ #[error(transparent)]
+ IoError(#[from] std::io::Error),
+}
+
+/// A database representing a backup generation.
+pub struct GenerationDb {
+ created: bool,
+ db: Database,
+ meta: Table,
+ files: Table,
+ chunks: Table,
+}
+
+impl GenerationDb {
+ /// Create a new generation database in read/write mode.
+ pub fn create<P: AsRef<Path>>(filename: P) -> Result<Self, GenerationDbError> {
+ let db = Database::create(filename.as_ref())?;
+ let mut moi = Self::new(db);
+ moi.created = true;
+ moi.create_tables()?;
+ Ok(moi)
+ }
+
+ /// Open an existing generation database in read-only mode.
+ pub fn open<P: AsRef<Path>>(filename: P) -> Result<Self, GenerationDbError> {
+ let db = Database::open(filename.as_ref())?;
+ Ok(Self::new(db))
+ }
+
+ fn new(db: Database) -> Self {
+ let meta = Table::new("meta")
+ .column(Column::text("key"))
+ .column(Column::text("value"))
+ .build();
+ let files = Table::new("files")
+ .column(Column::primary_key("fileno")) // FIXME: rename to fileid
+ .column(Column::blob("filename"))
+ .column(Column::text("json"))
+ .column(Column::text("reason"))
+ .column(Column::bool("is_cachedir_tag"))
+ .build();
+ let chunks = Table::new("chunks")
+ .column(Column::int("fileno")) // FIXME: rename to fileid
+ .column(Column::text("chunkid"))
+ .build();
+
+ Self {
+ created: false,
+ db,
+ meta,
+ files,
+ chunks,
+ }
+ }
+
+ fn create_tables(&mut self) -> Result<(), GenerationDbError> {
+ self.db.create_table(&self.meta)?;
+ self.db.create_table(&self.files)?;
+ self.db.create_table(&self.chunks)?;
+
+ self.db.insert(
+ &self.meta,
+ &[
+ Value::text("key", "schema_version_major"),
+ Value::text("value", &format!("{}", SCHEMA_MAJOR)),
+ ],
+ )?;
+ self.db.insert(
+ &self.meta,
+ &[
+ Value::text("key", "schema_version_minor"),
+ Value::text("value", &format!("{}", SCHEMA_MINOR)),
+ ],
+ )?;
+
+ Ok(())
+ }
+
+ /// Close a database, commit any changes.
+ pub fn close(self) -> Result<(), GenerationDbError> {
+ if self.created {
+ self.db
+ .create_index("filenames_idx", &self.files, "filename")?;
+ self.db.create_index("fileid_idx", &self.chunks, "fileno")?;
+ }
+ self.db.close().map_err(GenerationDbError::Database)
+ }
+
+ /// Return contents of "meta" table as a HashMap.
+ pub fn meta(&self) -> Result<HashMap<String, String>, GenerationDbError> {
+ let mut map = HashMap::new();
+ let mut iter = self.db.all_rows(&self.meta, &row_to_kv)?;
+ for kv in iter.iter()? {
+ let (key, value) = kv?;
+ map.insert(key, value);
+ }
+ Ok(map)
+ }
+
+ /// Insert a file system entry into the database.
+ pub fn insert(
+ &mut self,
+ e: FilesystemEntry,
+ fileid: FileId,
+ ids: &[ChunkId],
+ reason: Reason,
+ is_cachedir_tag: bool,
+ ) -> Result<(), GenerationDbError> {
+ let json = serde_json::to_string(&e)?;
+ self.db.insert(
+ &self.files,
+ &[
+ Value::primary_key("fileno", fileid),
+ Value::blob("filename", &path_into_blob(&e.pathbuf())),
+ Value::text("json", &json),
+ Value::text("reason", &format!("{}", reason)),
+ Value::bool("is_cachedir_tag", is_cachedir_tag),
+ ],
+ )?;
+ for id in ids {
+ self.db.insert(
+ &self.chunks,
+ &[
+ Value::int("fileno", fileid),
+ Value::text("chunkid", &format!("{}", id)),
+ ],
+ )?;
+ }
+ Ok(())
+ }
+
+ /// Count number of file system entries.
+ pub fn file_count(&self) -> Result<FileId, GenerationDbError> {
+ // FIXME: this needs to be done use "SELECT count(*) FROM
+ // files", but the Database abstraction doesn't support that
+ // yet.
+ let mut iter = self.db.all_rows(&self.files, &row_to_entry)?;
+ let mut count = 0;
+ for _ in iter.iter()? {
+ count += 1;
+ }
+ Ok(count)
+ }
+
+ /// Does a path refer to a cache directory?
+ pub fn is_cachedir_tag(&self, filename: &Path) -> Result<bool, GenerationDbError> {
+ let filename_vec = path_into_blob(filename);
+ let value = Value::blob("filename", &filename_vec);
+ let mut rows = self.db.some_rows(&self.files, &value, &row_to_entry)?;
+ let mut iter = rows.iter()?;
+
+ if let Some(row) = iter.next() {
+ // Make sure there's only one row for a given filename. A
+ // bug in a previous version, or a maliciously constructed
+ // generation, could result in there being more than one.
+ if iter.next().is_some() {
+ error!("too many files in file lookup");
+ Err(GenerationDbError::TooManyFiles(filename.to_path_buf()))
+ } else {
+ let (_, _, _, is_cachedir_tag) = row?;
+ Ok(is_cachedir_tag)
+ }
+ } else {
+ Ok(false)
+ }
+ }
+
+ /// Return all chunk ids in database.
+ pub fn chunkids(&self, fileid: FileId) -> Result<SqlResults<ChunkId>, GenerationDbError> {
+ let fileid = Value::int("fileno", fileid);
+ Ok(self.db.some_rows(&self.chunks, &fileid, &row_to_chunkid)?)
+ }
+
+ /// Return all file descriptions in database.
+ pub fn files(
+ &self,
+ ) -> Result<SqlResults<(FileId, FilesystemEntry, Reason, bool)>, GenerationDbError> {
+ Ok(self.db.all_rows(&self.files, &row_to_fsentry)?)
+ }
+
+ /// Get a file's information given its path.
+ pub fn get_file(&self, filename: &Path) -> Result<Option<FilesystemEntry>, GenerationDbError> {
+ match self.get_file_and_fileno(filename)? {
+ None => Ok(None),
+ Some((_, e, _)) => Ok(Some(e)),
+ }
+ }
+
+ /// Get a file's information given its id in the database.
+ pub fn get_fileno(&self, filename: &Path) -> Result<Option<FileId>, GenerationDbError> {
+ match self.get_file_and_fileno(filename)? {
+ None => Ok(None),
+ Some((id, _, _)) => Ok(Some(id)),
+ }
+ }
+
+ fn get_file_and_fileno(
+ &self,
+ filename: &Path,
+ ) -> Result<Option<(FileId, FilesystemEntry, String)>, GenerationDbError> {
+ let filename_bytes = path_into_blob(filename);
+ let value = Value::blob("filename", &filename_bytes);
+ let mut rows = self.db.some_rows(&self.files, &value, &row_to_entry)?;
+ let mut iter = rows.iter()?;
+
+ if let Some(row) = iter.next() {
+ // Make sure there's only one row for a given filename. A
+ // bug in a previous version, or a maliciously constructed
+ // generation, could result in there being more than one.
+ if iter.next().is_some() {
+ error!("too many files in file lookup");
+ Err(GenerationDbError::TooManyFiles(filename.to_path_buf()))
+ } else {
+ let (fileid, ref json, ref reason, _) = row?;
+ let entry = serde_json::from_str(json)?;
+ Ok(Some((fileid, entry, reason.to_string())))
+ }
+ } else {
+ Ok(None)
+ }
+ }
+}
+
+fn row_to_kv(row: &rusqlite::Row) -> rusqlite::Result<(String, String)> {
+ let k = row.get("key")?;
+ let v = row.get("value")?;
+ Ok((k, v))
+}
+
+fn path_into_blob(path: &Path) -> Vec<u8> {
+ path.as_os_str().as_bytes().to_vec()
+}
+
+fn row_to_entry(row: &rusqlite::Row) -> rusqlite::Result<(FileId, String, String, bool)> {
+ let fileno: FileId = row.get("fileno")?;
+ let json: String = row.get("json")?;
+ let reason: String = row.get("reason")?;
+ let is_cachedir_tag: bool = row.get("is_cachedir_tag")?;
+ Ok((fileno, json, reason, is_cachedir_tag))
+}
+
+fn row_to_fsentry(
+ row: &rusqlite::Row,
+) -> rusqlite::Result<(FileId, FilesystemEntry, Reason, bool)> {
+ let fileno: FileId = row.get("fileno")?;
+ let json: String = row.get("json")?;
+ let entry = serde_json::from_str(&json).map_err(|err| {
+ rusqlite::Error::FromSqlConversionFailure(0, rusqlite::types::Type::Text, Box::new(err))
+ })?;
+ let reason: String = row.get("reason")?;
+ let reason = Reason::from(&reason);
+ let is_cachedir_tag: bool = row.get("is_cachedir_tag")?;
+ Ok((fileno, entry, reason, is_cachedir_tag))
+}
+
+fn row_to_chunkid(row: &rusqlite::Row) -> rusqlite::Result<ChunkId> {
+ let chunkid: String = row.get("chunkid")?;
+ let chunkid = ChunkId::recreate(&chunkid);
+ Ok(chunkid)
+}
+
+#[cfg(test)]
+mod test {
+ use super::Database;
+ use tempfile::tempdir;
+
+ #[test]
+ fn opens_previously_created_db() {
+ let dir = tempdir().unwrap();
+ let filename = dir.path().join("test.db");
+ Database::create(&filename).unwrap();
+ assert!(Database::open(&filename).is_ok());
+ }
+}
diff --git a/src/error.rs b/src/error.rs
index e8f5ee8..cf18c83 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -5,6 +5,7 @@ use crate::cipher::CipherError;
use crate::client::ClientError;
use crate::cmd::restore::RestoreError;
use crate::config::ClientConfigError;
+use crate::db::DatabaseError;
use crate::generation::{LocalGenerationError, NascentError};
use crate::genlist::GenerationListError;
use crate::passwords::PasswordError;
@@ -51,6 +52,10 @@ pub enum ObnamError {
#[error(transparent)]
LocalGenerationError(#[from] LocalGenerationError),
+ /// Error using a Database.
+ #[error(transparent)]
+ Database(#[from] DatabaseError),
+
/// Error restoring a backup.
#[error(transparent)]
RestoreError(#[from] RestoreError),
diff --git a/src/fsentry.rs b/src/fsentry.rs
index 9f40fd0..90afd70 100644
--- a/src/fsentry.rs
+++ b/src/fsentry.rs
@@ -8,6 +8,7 @@ use std::fs::{FileType, Metadata};
use std::os::unix::ffi::OsStringExt;
use std::os::unix::fs::FileTypeExt;
use std::path::{Path, PathBuf};
+use users::{Groups, Users, UsersCache};
#[cfg(target_os = "linux")]
use std::os::linux::fs::MetadataExt;
@@ -69,7 +70,11 @@ pub enum FsEntryError {
#[allow(clippy::len_without_is_empty)]
impl FilesystemEntry {
/// Create an `FsEntry` from a file's metadata.
- pub fn from_metadata(path: &Path, meta: &Metadata) -> Result<Self, FsEntryError> {
+ pub fn from_metadata(
+ path: &Path,
+ meta: &Metadata,
+ cache: &mut UsersCache,
+ ) -> Result<Self, FsEntryError> {
let kind = FilesystemKind::from_file_type(meta.file_type());
let symlink_target = if kind == FilesystemKind::Symlink {
debug!("reading symlink target for {:?}", path);
@@ -82,6 +87,16 @@ impl FilesystemEntry {
let uid = meta.st_uid();
let gid = meta.st_gid();
+ let user: String = if let Some(user) = cache.get_user_by_uid(uid) {
+ user.name().to_string_lossy().to_string()
+ } else {
+ "".to_string()
+ };
+ let group = if let Some(group) = cache.get_group_by_gid(gid) {
+ group.name().to_string_lossy().to_string()
+ } else {
+ "".to_string()
+ };
Ok(Self {
path: path.to_path_buf().into_os_string().into_vec(),
@@ -95,8 +110,8 @@ impl FilesystemEntry {
symlink_target,
uid,
gid,
- user: get_username(uid),
- group: get_groupname(gid),
+ user,
+ group,
})
}
@@ -152,20 +167,6 @@ impl FilesystemEntry {
}
}
-fn get_username(uid: u32) -> String {
- match users::get_user_by_uid(uid) {
- None => "".to_string(),
- Some(user) => user.name().to_os_string().to_string_lossy().into_owned(),
- }
-}
-
-fn get_groupname(gid: u32) -> String {
- match users::get_group_by_gid(gid) {
- None => "".to_string(),
- Some(group) => group.name().to_os_string().to_string_lossy().into_owned(),
- }
-}
-
/// Different types of file system entries.
#[derive(Debug, Copy, Clone, PartialEq, Serialize, Deserialize)]
pub enum FilesystemKind {
diff --git a/src/fsiter.rs b/src/fsiter.rs
index 2747cce..ef2886d 100644
--- a/src/fsiter.rs
+++ b/src/fsiter.rs
@@ -1,8 +1,9 @@
//! Iterate over directory tree.
use crate::fsentry::{FilesystemEntry, FsEntryError};
-use log::{debug, warn};
+use log::warn;
use std::path::{Path, PathBuf};
+use users::UsersCache;
use walkdir::{DirEntry, IntoIter, WalkDir};
/// Filesystem entry along with additional info about it.
@@ -56,6 +57,7 @@ impl Iterator for FsIterator {
/// Cachedir-aware adaptor for WalkDir: it skips the contents of dirs that contain CACHEDIR.TAG,
/// but still yields entries for the dir and the tag themselves.
struct SkipCachedirs {
+ cache: UsersCache,
iter: IntoIter,
exclude_cache_tag_directories: bool,
// This is the last tag we've found. `next()` will yield it before asking `iter` for more
@@ -66,6 +68,7 @@ struct SkipCachedirs {
impl SkipCachedirs {
fn new(iter: IntoIter, exclude_cache_tag_directories: bool) -> Self {
Self {
+ cache: UsersCache::new(),
iter,
exclude_cache_tag_directories,
cachedir_tag: None,
@@ -109,7 +112,7 @@ impl SkipCachedirs {
if content == CACHEDIR_TAG {
self.iter.skip_current_dir();
- self.cachedir_tag = Some(new_entry(&tag_path, true));
+ self.cachedir_tag = Some(new_entry(&tag_path, true, &mut self.cache));
}
}
}
@@ -120,22 +123,24 @@ impl Iterator for SkipCachedirs {
fn next(&mut self) -> Option<Self::Item> {
self.cachedir_tag.take().or_else(|| {
let next = self.iter.next();
- debug!("walkdir found: {:?}", next);
match next {
None => None,
Some(Err(err)) => Some(Err(FsIterError::WalkDir(err))),
Some(Ok(entry)) => {
self.try_enqueue_cachedir_tag(&entry);
- Some(new_entry(entry.path(), false))
+ Some(new_entry(entry.path(), false, &mut self.cache))
}
}
})
}
}
-fn new_entry(path: &Path, is_cachedir_tag: bool) -> Result<AnnotatedFsEntry, FsIterError> {
+fn new_entry(
+ path: &Path,
+ is_cachedir_tag: bool,
+ cache: &mut UsersCache,
+) -> Result<AnnotatedFsEntry, FsIterError> {
let meta = std::fs::symlink_metadata(path);
- debug!("metadata for {:?}: {:?}", path, meta);
let meta = match meta {
Ok(meta) => meta,
Err(err) => {
@@ -143,8 +148,7 @@ fn new_entry(path: &Path, is_cachedir_tag: bool) -> Result<AnnotatedFsEntry, FsI
return Err(FsIterError::Metadata(path.to_path_buf(), err));
}
};
- let entry = FilesystemEntry::from_metadata(path, &meta)?;
- debug!("FileSystemEntry for {:?}: {:?}", path, entry);
+ let entry = FilesystemEntry::from_metadata(path, &meta, cache)?;
let annotated = AnnotatedFsEntry {
inner: entry,
is_cachedir_tag,
diff --git a/src/generation.rs b/src/generation.rs
index 454bb50..05163be 100644
--- a/src/generation.rs
+++ b/src/generation.rs
@@ -2,22 +2,14 @@
use crate::backup_reason::Reason;
use crate::chunkid::ChunkId;
+use crate::db::{DatabaseError, SqlResults};
+use crate::dbgen::{FileId, GenerationDb, GenerationDbError, SCHEMA_MAJOR, SCHEMA_MINOR};
use crate::fsentry::FilesystemEntry;
-use rusqlite::Connection;
use serde::Serialize;
use std::collections::HashMap;
use std::fmt;
use std::path::{Path, PathBuf};
-/// Current generation database schema major version.
-const SCHEMA_MAJOR: u32 = 0;
-
-/// Current generation database schema minor version.
-const SCHEMA_MINOR: u32 = 0;
-
-/// An identifier for a file in a generation.
-type FileId = i64;
-
/// An identifier for a generation.
#[derive(Debug, Clone)]
pub struct GenId {
@@ -51,7 +43,7 @@ impl fmt::Display for GenId {
/// finished yet, and it's not actually on the server until the upload
/// of its generation chunk.
pub struct NascentGeneration {
- conn: Connection,
+ db: GenerationDb,
fileno: FileId,
}
@@ -66,6 +58,10 @@ pub enum NascentError {
#[error(transparent)]
LocalGenerationError(#[from] LocalGenerationError),
+ /// Error from a GenerationDb.
+ #[error(transparent)]
+ GenerationDb(#[from] GenerationDbError),
+
/// Error from an SQL transaction.
#[error("SQL transaction error: {0}")]
Transaction(rusqlite::Error),
@@ -85,8 +81,13 @@ impl NascentGeneration {
where
P: AsRef<Path>,
{
- let conn = sql::create_db(filename.as_ref())?;
- Ok(Self { conn, fileno: 0 })
+ let db = GenerationDb::create(filename.as_ref())?;
+ Ok(Self { db, fileno: 0 })
+ }
+
+ /// Commit any changes, and close the database.
+ pub fn close(self) -> Result<(), NascentError> {
+ self.db.close().map_err(NascentError::GenerationDb)
}
/// How many files are there now in the nascent generation?
@@ -102,10 +103,9 @@ impl NascentGeneration {
reason: Reason,
is_cachedir_tag: bool,
) -> Result<(), NascentError> {
- let t = self.conn.transaction().map_err(NascentError::Transaction)?;
self.fileno += 1;
- sql::insert_one(&t, e, self.fileno, ids, reason, is_cachedir_tag)?;
- t.commit().map_err(NascentError::Commit)?;
+ self.db
+ .insert(e, self.fileno, ids, reason, is_cachedir_tag)?;
Ok(())
}
}
@@ -146,7 +146,7 @@ impl FinishedGeneration {
/// This is for querying an existing generation, and other read-only
/// operations.
pub struct LocalGeneration {
- conn: Connection,
+ db: GenerationDb,
}
/// Possible errors from using local generations.
@@ -177,6 +177,14 @@ pub enum LocalGenerationError {
#[error(transparent)]
RusqliteError(#[from] rusqlite::Error),
+ /// Error from a GenerationDb.
+ #[error(transparent)]
+ GenerationDb(#[from] GenerationDbError),
+
+ /// Error from a Database.
+ #[error(transparent)]
+ Database(#[from] DatabaseError),
+
/// Error from JSON.
#[error(transparent)]
SerdeJsonError(#[from] serde_json::Error),
@@ -195,8 +203,7 @@ pub struct BackedUpFile {
impl BackedUpFile {
/// Create a new `BackedUpFile`.
- pub fn new(fileno: FileId, entry: FilesystemEntry, reason: &str) -> Self {
- let reason = Reason::from(reason);
+ pub fn new(fileno: FileId, entry: FilesystemEntry, reason: Reason) -> Self {
Self {
fileno,
entry,
@@ -221,8 +228,8 @@ impl BackedUpFile {
}
impl LocalGeneration {
- fn new(conn: Connection) -> Self {
- Self { conn }
+ fn new(db: GenerationDb) -> Self {
+ Self { db }
}
/// Open a local file as a local generation.
@@ -230,8 +237,8 @@ impl LocalGeneration {
where
P: AsRef<Path>,
{
- let conn = sql::open_db(filename.as_ref())?;
- let gen = Self::new(conn);
+ let db = GenerationDb::open(filename.as_ref())?;
+ let gen = Self::new(db);
let schema = gen.meta()?.schema_version();
let our_schema = SchemaVersion::new(SCHEMA_MAJOR, SCHEMA_MINOR);
if !our_schema.is_compatible_with(&schema) {
@@ -245,26 +252,27 @@ impl LocalGeneration {
/// Return generation metadata for local generation.
pub fn meta(&self) -> Result<GenMeta, LocalGenerationError> {
- let map = sql::meta(&self.conn)?;
+ let map = self.db.meta()?;
GenMeta::from(map)
}
/// How many files are there in the local generation?
- pub fn file_count(&self) -> Result<i64, LocalGenerationError> {
- sql::file_count(&self.conn)
+ pub fn file_count(&self) -> Result<FileId, LocalGenerationError> {
+ Ok(self.db.file_count()?)
}
/// Return all files in the local generation.
- pub fn files(&self) -> Result<sql::SqlResults<BackedUpFile>, LocalGenerationError> {
- sql::files(&self.conn)
+ pub fn files(
+ &self,
+ ) -> Result<SqlResults<(FileId, FilesystemEntry, Reason, bool)>, LocalGenerationError> {
+ self.db.files().map_err(LocalGenerationError::GenerationDb)
}
/// Return ids for all chunks in local generation.
- pub fn chunkids(
- &self,
- fileno: FileId,
- ) -> Result<sql::SqlResults<ChunkId>, LocalGenerationError> {
- sql::chunkids(&self.conn, fileno)
+ pub fn chunkids(&self, fileid: FileId) -> Result<SqlResults<ChunkId>, LocalGenerationError> {
+ self.db
+ .chunkids(fileid)
+ .map_err(LocalGenerationError::GenerationDb)
}
/// Return entry for a file, given its pathname.
@@ -272,17 +280,23 @@ impl LocalGeneration {
&self,
filename: &Path,
) -> Result<Option<FilesystemEntry>, LocalGenerationError> {
- sql::get_file(&self.conn, filename)
+ self.db
+ .get_file(filename)
+ .map_err(LocalGenerationError::GenerationDb)
}
/// Get the id in the local generation of a file, given its pathname.
pub fn get_fileno(&self, filename: &Path) -> Result<Option<FileId>, LocalGenerationError> {
- sql::get_fileno(&self.conn, filename)
+ self.db
+ .get_fileno(filename)
+ .map_err(LocalGenerationError::GenerationDb)
}
/// Does a pathname refer to a cache directory?
pub fn is_cachedir_tag(&self, filename: &Path) -> Result<bool, LocalGenerationError> {
- sql::is_cachedir_tag(&self.conn, filename)
+ self.db
+ .is_cachedir_tag(filename)
+ .map_err(LocalGenerationError::GenerationDb)
}
}
@@ -393,278 +407,6 @@ mod test_schema {
}
}
-mod sql {
- use super::BackedUpFile;
- use super::FileId;
- use super::LocalGenerationError;
- use crate::backup_reason::Reason;
- use crate::chunkid::ChunkId;
- use crate::fsentry::FilesystemEntry;
- use crate::generation::SCHEMA_MAJOR;
- use crate::generation::SCHEMA_MINOR;
- use log::debug;
- use rusqlite::{params, Connection, OpenFlags, Row, Statement, Transaction};
- use std::collections::HashMap;
- use std::os::unix::ffi::OsStrExt;
- use std::path::Path;
-
- /// Create a new database in a file.
- pub fn create_db(filename: &Path) -> Result<Connection, LocalGenerationError> {
- let flags = OpenFlags::SQLITE_OPEN_CREATE | OpenFlags::SQLITE_OPEN_READ_WRITE;
- let conn = Connection::open_with_flags(filename, flags)?;
- conn.execute("CREATE TABLE meta (key TEXT, value TEXT)", params![])?;
- init_meta(&conn)?;
- conn.execute(
- "CREATE TABLE files (fileno INTEGER PRIMARY KEY, filename BLOB, json TEXT, reason TEXT, is_cachedir_tag BOOLEAN)",
- params![],
- )?;
- conn.execute(
- "CREATE TABLE chunks (fileno INTEGER, chunkid TEXT)",
- params![],
- )?;
- conn.execute("CREATE INDEX filenames ON files (filename)", params![])?;
- conn.execute("CREATE INDEX filenos ON chunks (fileno)", params![])?;
- conn.pragma_update(None, "journal_mode", &"WAL")?;
- Ok(conn)
- }
-
- fn init_meta(conn: &Connection) -> Result<(), LocalGenerationError> {
- conn.execute(
- "INSERT INTO meta (key, value) VALUES (?1, ?2)",
- params!["schema_version_major", SCHEMA_MAJOR],
- )?;
- conn.execute(
- "INSERT INTO meta (key, value) VALUES (?1, ?2)",
- params!["schema_version_minor", SCHEMA_MINOR],
- )?;
- Ok(())
- }
-
- /// Open an existing database in a file.
- pub fn open_db(filename: &Path) -> Result<Connection, LocalGenerationError> {
- let flags = OpenFlags::SQLITE_OPEN_READ_WRITE;
- let conn = Connection::open_with_flags(filename, flags)?;
- conn.pragma_update(None, "journal_mode", &"WAL")?;
- Ok(conn)
- }
-
- /// Return generation metadata from a database.
- pub fn meta(conn: &Connection) -> Result<HashMap<String, String>, LocalGenerationError> {
- let mut stmt = conn.prepare("SELECT key, value FROM meta")?;
- let iter = stmt.query_map(params![], row_to_key_value)?;
- let mut map = HashMap::new();
- for r in iter {
- let (key, value) = r?;
- map.insert(key, value);
- }
- Ok(map)
- }
-
- fn row_to_key_value(row: &Row) -> rusqlite::Result<(String, String)> {
- let key: String = row.get("key")?;
- let value: String = row.get("value")?;
- Ok((key, value))
- }
-
- /// Insert one file system entry into the database.
- pub fn insert_one(
- t: &Transaction,
- e: FilesystemEntry,
- fileno: FileId,
- ids: &[ChunkId],
- reason: Reason,
- is_cachedir_tag: bool,
- ) -> Result<(), LocalGenerationError> {
- let json = serde_json::to_string(&e)?;
- t.execute(
- "INSERT INTO files (fileno, filename, json, reason, is_cachedir_tag) VALUES (?1, ?2, ?3, ?4, ?5)",
- params![fileno, path_into_blob(&e.pathbuf()), &json, reason, is_cachedir_tag,],
- )?;
- for id in ids {
- t.execute(
- "INSERT INTO chunks (fileno, chunkid) VALUES (?1, ?2)",
- params![fileno, id],
- )?;
- }
- Ok(())
- }
-
- fn path_into_blob(path: &Path) -> Vec<u8> {
- path.as_os_str().as_bytes().to_vec()
- }
-
- /// Parse an SQL query result row.
- pub fn row_to_entry(row: &Row) -> rusqlite::Result<(FileId, String, String)> {
- let fileno: FileId = row.get("fileno")?;
- let json: String = row.get("json")?;
- let reason: String = row.get("reason")?;
- Ok((fileno, json, reason))
- }
-
- /// Count number of file system entries.
- pub fn file_count(conn: &Connection) -> Result<FileId, LocalGenerationError> {
- let mut stmt = conn.prepare("SELECT count(*) FROM files")?;
- let mut iter = stmt.query_map(params![], |row| row.get(0))?;
- let count = iter.next().expect("SQL count result (1)");
- let count = count?;
- Ok(count)
- }
-
- // A pointer to a "fallible iterator" over values of type `T`, which is to say it's an iterator
- // over values of type `Result<T, LocalGenerationError>`. The iterator is only valid for the
- // lifetime 'stmt.
- //
- // The fact that it's a pointer (`Box<dyn ...>`) means we don't care what the actual type of
- // the iterator is, and who produces it.
- type SqlResultsIterator<'stmt, T> =
- Box<dyn Iterator<Item = Result<T, LocalGenerationError>> + 'stmt>;
-
- // A pointer to a function which, when called on a prepared SQLite statement, would create
- // a "fallible iterator" over values of type `ItemT`. (See above for an explanation of what
- // a "fallible iterator" is.)
- //
- // The iterator is only valid for the lifetime of the associated SQLite statement; we
- // call this lifetime 'stmt, and use it both both on the reference and the returned Now.
- //
- // we iterator're in a pickle: all named lifetimes have to be declared _somewhere_, but we can't add
- // 'stmt to the signature of `CreateIterFn` because then we'll have to specify it when we
- // define the function. Obviously, at that point we won't yet have a `Statement`, and thus we
- // would have no idea what its lifetime is going to be. So we can't put the 'stmt lifetime into
- // the signature of `CreateIterFn`.
- //
- // That's what `for<'stmt>` is for. This is a so-called ["higher-rank trait bound"][hrtb], and
- // it enables us to say that a function is valid for *some* lifetime 'stmt that we pass into it
- // at the call site. It lets Rust continue to track lifetimes even though `CreateIterFn`
- // interferes by "hiding" the 'stmt lifetime from its signature.
- //
- // [hrtb]: https://doc.rust-lang.org/nomicon/hrtb.html
- type CreateIterFn<'conn, ItemT> = Box<
- dyn for<'stmt> Fn(
- &'stmt mut Statement<'conn>,
- )
- -> Result<SqlResultsIterator<'stmt, ItemT>, LocalGenerationError>,
- >;
-
- /// Iterator of SQL results.
- pub struct SqlResults<'conn, ItemT> {
- stmt: Statement<'conn>,
- create_iter: CreateIterFn<'conn, ItemT>,
- }
-
- impl<'conn, ItemT> SqlResults<'conn, ItemT> {
- fn new(
- conn: &'conn Connection,
- statement: &str,
- create_iter: CreateIterFn<'conn, ItemT>,
- ) -> Result<Self, LocalGenerationError> {
- let stmt = conn.prepare(statement)?;
- Ok(Self { stmt, create_iter })
- }
-
- /// Create an iterator over results.
- pub fn iter(&'_ mut self) -> Result<SqlResultsIterator<'_, ItemT>, LocalGenerationError> {
- (self.create_iter)(&mut self.stmt)
- }
- }
-
- /// Return all file system entries in database.
- pub fn files(conn: &Connection) -> Result<SqlResults<BackedUpFile>, LocalGenerationError> {
- SqlResults::new(
- conn,
- "SELECT * FROM files",
- Box::new(|stmt| {
- let iter = stmt.query_map(params![], row_to_entry)?;
- let iter = iter.map(|x| match x {
- Ok((fileno, json, reason)) => serde_json::from_str(&json)
- .map(|entry| BackedUpFile::new(fileno, entry, &reason))
- .map_err(|e| e.into()),
- Err(e) => Err(e.into()),
- });
- Ok(Box::new(iter))
- }),
- )
- }
-
- /// Return all chunk ids in database.
- pub fn chunkids(
- conn: &Connection,
- fileno: FileId,
- ) -> Result<SqlResults<ChunkId>, LocalGenerationError> {
- SqlResults::new(
- conn,
- "SELECT chunkid FROM chunks WHERE fileno = ?1",
- Box::new(move |stmt| {
- let iter = stmt.query_map(params![fileno], |row| row.get(0))?;
- let iter = iter.map(|x| {
- let fileno: String = x?;
- Ok(ChunkId::from(&fileno))
- });
- Ok(Box::new(iter))
- }),
- )
- }
-
- /// Get a file's information given its path.
- pub fn get_file(
- conn: &Connection,
- filename: &Path,
- ) -> Result<Option<FilesystemEntry>, LocalGenerationError> {
- match get_file_and_fileno(conn, filename)? {
- None => Ok(None),
- Some((_, e, _)) => Ok(Some(e)),
- }
- }
-
- /// Get a file's information given it's id in the database.
- pub fn get_fileno(
- conn: &Connection,
- filename: &Path,
- ) -> Result<Option<FileId>, LocalGenerationError> {
- match get_file_and_fileno(conn, filename)? {
- None => Ok(None),
- Some((id, _, _)) => Ok(Some(id)),
- }
- }
-
- fn get_file_and_fileno(
- conn: &Connection,
- filename: &Path,
- ) -> Result<Option<(FileId, FilesystemEntry, String)>, LocalGenerationError> {
- let mut stmt = conn.prepare("SELECT * FROM files WHERE filename = ?1")?;
- let mut iter = stmt.query_map(params![path_into_blob(filename)], row_to_entry)?;
- match iter.next() {
- None => Ok(None),
- Some(Err(e)) => {
- debug!("database lookup error: {}", e);
- Err(e.into())
- }
- Some(Ok((fileno, json, reason))) => {
- let entry = serde_json::from_str(&json)?;
- if iter.next() == None {
- Ok(Some((fileno, entry, reason)))
- } else {
- debug!("too many files in file lookup");
- Err(LocalGenerationError::TooManyFiles(filename.to_path_buf()))
- }
- }
- }
- }
-
- /// Does a path refer to a cache directory?
- pub fn is_cachedir_tag(
- conn: &Connection,
- filename: &Path,
- ) -> Result<bool, LocalGenerationError> {
- let mut stmt = conn.prepare("SELECT is_cachedir_tag FROM files WHERE filename = ?1")?;
- let mut iter = stmt.query_map(params![path_into_blob(filename)], |row| row.get(0))?;
- match iter.next() {
- // File is missing, so it's definitely not a CACHEDIR.TAG
- None => Ok(false),
- Some(result) => Ok(result?),
- }
- }
-}
-
#[cfg(test)]
mod test {
use super::{LocalGeneration, NascentGeneration};
@@ -689,7 +431,7 @@ mod test {
use crate::{
backup_reason::Reason, backup_run::FsEntryBackupOutcome, fsentry::FilesystemEntry,
};
- use std::{fs::metadata, mem::drop, path::Path};
+ use std::{fs::metadata, path::Path};
// Create a `Metadata` structure to pass to other functions (we don't care about the
// contents)
@@ -704,16 +446,17 @@ mod test {
let tag_path2 = Path::new("/another_dir/a_tag");
let mut gen = NascentGeneration::create(&dbfile).unwrap();
+ let mut cache = users::UsersCache::new();
gen.insert(
- FilesystemEntry::from_metadata(nontag_path1, &metadata).unwrap(),
+ FilesystemEntry::from_metadata(nontag_path1, &metadata, &mut cache).unwrap(),
&[],
Reason::IsNew,
false,
)
.unwrap();
gen.insert(
- FilesystemEntry::from_metadata(tag_path1, &metadata).unwrap(),
+ FilesystemEntry::from_metadata(tag_path1, &metadata, &mut cache).unwrap(),
&[],
Reason::IsNew,
true,
@@ -722,13 +465,13 @@ mod test {
let entries = vec![
FsEntryBackupOutcome {
- entry: FilesystemEntry::from_metadata(nontag_path2, &metadata).unwrap(),
+ entry: FilesystemEntry::from_metadata(nontag_path2, &metadata, &mut cache).unwrap(),
ids: vec![],
reason: Reason::IsNew,
is_cachedir_tag: false,
},
FsEntryBackupOutcome {
- entry: FilesystemEntry::from_metadata(tag_path2, &metadata).unwrap(),
+ entry: FilesystemEntry::from_metadata(tag_path2, &metadata, &mut cache).unwrap(),
ids: vec![],
reason: Reason::IsNew,
is_cachedir_tag: true,
@@ -740,7 +483,7 @@ mod test {
.unwrap();
}
- drop(gen);
+ gen.close().unwrap();
let gen = LocalGeneration::open(dbfile).unwrap();
assert!(!gen.is_cachedir_tag(nontag_path1).unwrap());
diff --git a/src/lib.rs b/src/lib.rs
index 8961df4..6a30334 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -17,6 +17,8 @@ pub mod cipher;
pub mod client;
pub mod cmd;
pub mod config;
+pub mod db;
+pub mod dbgen;
pub mod engine;
pub mod error;
pub mod fsentry;
diff --git a/src/policy.rs b/src/policy.rs
index 7241f0f..bb98a48 100644
--- a/src/policy.rs
+++ b/src/policy.rs
@@ -3,7 +3,7 @@
use crate::backup_reason::Reason;
use crate::fsentry::FilesystemEntry;
use crate::generation::LocalGeneration;
-use log::{debug, warn};
+use log::warn;
/// Policy for what gets backed up.
///
@@ -59,10 +59,6 @@ impl BackupPolicy {
Reason::GenerationLookupError
}
};
- debug!(
- "needs_backup: file {:?}: policy decision: {}",
- new_name, reason
- );
reason
}
}