From dd8ac8901cd15945bce8d1a072abbcbdfe6d5a83 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Thu, 3 Mar 2022 18:53:53 +0200 Subject: feat! use Rust 2021 to benefit from changes to partial borrowing Sponsored-by: author --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 17535db..3d69988 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "obnam" version ="0.7.0" authors = ["Lars Wirzenius "] -edition = "2018" +edition = "2021" description = "a backup program" license = "AGPL-3.0-or-later" homepage = "https://obnam.org/" -- cgit v1.2.1 From 83b83530c05e23945cfe5a11a2125c4d93d40a93 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sat, 26 Feb 2022 07:18:46 +0200 Subject: refactor: use FileId instead of raw type This is clearer and less error prone. Sponsored-by: author --- src/backup_run.rs | 3 ++- src/cmd/backup.rs | 3 ++- src/cmd/restore.rs | 9 +++++---- src/generation.rs | 6 ++---- src/lib.rs | 2 ++ 5 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/backup_run.rs b/src/backup_run.rs index 0b816bf..464179b 100644 --- a/src/backup_run.rs +++ b/src/backup_run.rs @@ -7,6 +7,7 @@ use crate::chunker::{ChunkerError, FileChunks}; use crate::chunkid::ChunkId; use crate::client::{BackupClient, ClientError}; use crate::config::ClientConfig; +use crate::dbgen::FileId; use crate::error::ObnamError; use crate::fsentry::{FilesystemEntry, FilesystemKind}; use crate::fsiter::{AnnotatedFsEntry, FsIterError, FsIterator}; @@ -84,7 +85,7 @@ struct OneRootBackupOutcome { #[derive(Debug)] pub struct RootsBackupOutcome { /// The number of backed up files. - pub files_count: i64, + pub files_count: FileId, /// The errors encountered while backing up files. pub warnings: Vec, /// CACHEDIR.TAG files that aren't present in in a previous generation. diff --git a/src/cmd/backup.rs b/src/cmd/backup.rs index 92b0f40..e4569e8 100644 --- a/src/cmd/backup.rs +++ b/src/cmd/backup.rs @@ -3,6 +3,7 @@ use crate::backup_run::BackupRun; use crate::client::BackupClient; use crate::config::ClientConfig; +use crate::dbgen::FileId; use crate::error::ObnamError; use crate::generation::GenId; @@ -76,7 +77,7 @@ impl Backup { fn report_stats( runtime: &SystemTime, - file_count: i64, + file_count: FileId, gen_id: &GenId, num_warnings: usize, ) -> Result<(), ObnamError> { diff --git a/src/cmd/restore.rs b/src/cmd/restore.rs index 7b3d95e..983efbb 100644 --- a/src/cmd/restore.rs +++ b/src/cmd/restore.rs @@ -3,6 +3,7 @@ use crate::backup_reason::Reason; use crate::client::{BackupClient, ClientError}; use crate::config::ClientConfig; +use crate::dbgen::FileId; use crate::error::ObnamError; use crate::fsentry::{FilesystemEntry, FilesystemKind}; use crate::generation::{LocalGeneration, LocalGenerationError}; @@ -132,7 +133,7 @@ pub enum RestoreError { async fn restore_generation( client: &BackupClient, gen: &LocalGeneration, - fileid: i64, + fileid: FileId, entry: &FilesystemEntry, to: &Path, progress: &ProgressBar, @@ -185,7 +186,7 @@ async fn restore_regular( client: &BackupClient, gen: &LocalGeneration, path: &Path, - fileid: i64, + fileid: FileId, entry: &FilesystemEntry, ) -> Result<(), RestoreError> { debug!("restoring regular {}", path.display()); @@ -293,9 +294,9 @@ fn path_to_cstring(path: &Path) -> CString { CString::new(path).unwrap() } -fn create_progress_bar(file_count: i64, verbose: bool) -> ProgressBar { +fn create_progress_bar(file_count: FileId, verbose: bool) -> ProgressBar { let progress = if verbose { - ProgressBar::new(file_count as u64) + ProgressBar::new(file_count) } else { ProgressBar::hidden() }; diff --git a/src/generation.rs b/src/generation.rs index 454bb50..2a886ad 100644 --- a/src/generation.rs +++ b/src/generation.rs @@ -2,6 +2,7 @@ use crate::backup_reason::Reason; use crate::chunkid::ChunkId; +use crate::dbgen::FileId; use crate::fsentry::FilesystemEntry; use rusqlite::Connection; use serde::Serialize; @@ -15,9 +16,6 @@ const SCHEMA_MAJOR: u32 = 0; /// Current generation database schema minor version. const SCHEMA_MINOR: u32 = 0; -/// An identifier for a file in a generation. -type FileId = i64; - /// An identifier for a generation. #[derive(Debug, Clone)] pub struct GenId { @@ -250,7 +248,7 @@ impl LocalGeneration { } /// How many files are there in the local generation? - pub fn file_count(&self) -> Result { + pub fn file_count(&self) -> Result { sql::file_count(&self.conn) } diff --git a/src/lib.rs b/src/lib.rs index 8961df4..6a30334 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,6 +17,8 @@ pub mod cipher; pub mod client; pub mod cmd; pub mod config; +pub mod db; +pub mod dbgen; pub mod engine; pub mod error; pub mod fsentry; -- cgit v1.2.1 From 4a98c9690801bac2821fe71aaefe63c1376f6256 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Fri, 25 Feb 2022 11:24:18 +0200 Subject: refactor: add a low level SQLite wrapper This makes the code clearer and allows for catching more errors, albeit at runtime, such as using the wrong column name. Sponsored-by: author --- src/db.rs | 629 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/error.rs | 5 + 2 files changed, 634 insertions(+) create mode 100644 src/db.rs diff --git a/src/db.rs b/src/db.rs new file mode 100644 index 0000000..b4de399 --- /dev/null +++ b/src/db.rs @@ -0,0 +1,629 @@ +//! A database abstraction around SQLite for Obnam. +//! +//! This abstraction provided the bare minimum that Obnam needs, while +//! trying to be as performant as possible, especially for inserting +//! rows. Only data types needed by Obnam are supported. +//! +//! Note that this abstraction is entirely synchronous. This is for +//! simplicity, as SQLite only allows one write at a time. + +use crate::fsentry::FilesystemEntry; +use rusqlite::{params, types::ToSqlOutput, CachedStatement, Connection, OpenFlags, Row, ToSql}; +use std::collections::HashSet; +use std::convert::TryFrom; +use std::path::{Path, PathBuf}; + +/// A database. +pub struct Database { + conn: Connection, +} + +impl Database { + /// Create a new database file for an empty database. + /// + /// The database can be written to. + pub fn create>(filename: P) -> Result { + if filename.as_ref().exists() { + return Err(DatabaseError::Exists(filename.as_ref().to_path_buf())); + } + let flags = OpenFlags::SQLITE_OPEN_CREATE | OpenFlags::SQLITE_OPEN_READ_WRITE; + let conn = Connection::open_with_flags(filename, flags)?; + conn.execute("BEGIN", params![])?; + Ok(Self { conn }) + } + + /// Open an existing database file in read only mode. + pub fn open>(filename: P) -> Result { + let flags = OpenFlags::SQLITE_OPEN_READ_ONLY; + let conn = Connection::open_with_flags(filename, flags)?; + Ok(Self { conn }) + } + + /// Close an open database, committing any changes to disk. + pub fn close(self) -> Result<(), DatabaseError> { + self.conn.execute("COMMIT", params![])?; + self.conn + .close() + .map_err(|(_, err)| DatabaseError::Rusqlite(err))?; + Ok(()) + } + + /// Create a table in the database. + pub fn create_table(&self, table: &Table) -> Result<(), DatabaseError> { + let sql = sql_statement::create_table(table); + self.conn.execute(&sql, params![])?; + Ok(()) + } + + /// Create an index in the database. + pub fn create_index( + &self, + name: &str, + table: &Table, + column: &str, + ) -> Result<(), DatabaseError> { + let sql = sql_statement::create_index(name, table, column); + self.conn.execute(&sql, params![])?; + Ok(()) + } + + /// Insert a row in a table. + pub fn insert(&mut self, table: &Table, values: &[Value]) -> Result<(), DatabaseError> { + let mut stmt = self.conn.prepare_cached(table.insert())?; + assert!(table.has_columns(values)); + // The ToSql trait implementation for Obnam values can't ever + // fail, so we don't handle the error case in the parameter + // creation below. + stmt.execute(rusqlite::params_from_iter(values.iter().map(|v| { + v.to_sql() + .expect("conversion of Obnam value to SQLite value failed unexpectedly") + })))?; + Ok(()) + } + + /// Return an iterator for all rows in a table. + pub fn all_rows( + &self, + table: &Table, + rowfunc: &'static dyn Fn(&Row) -> Result, + ) -> Result, DatabaseError> { + let sql = sql_statement::select_all_rows(table); + SqlResults::new( + &self.conn, + &sql, + None, + Box::new(|stmt, _| { + let iter = stmt.query_map(params![], |row| rowfunc(row))?; + let iter = iter.map(|x| match x { + Ok(t) => Ok(t), + Err(e) => Err(DatabaseError::Rusqlite(e)), + }); + Ok(Box::new(iter)) + }), + ) + } + + /// Return rows that have a given value in a given column. + /// + /// This is simplistic, but for Obnam, it provides all the SQL + /// SELECT ... WHERE that's needed, and there's no point in making + /// this more generic than is needed. + pub fn some_rows( + &self, + table: &Table, + value: &Value, + rowfunc: &'static dyn Fn(&Row) -> Result, + ) -> Result, DatabaseError> { + assert!(table.has_column(value)); + let sql = sql_statement::select_some_rows(table, value.name()); + SqlResults::new( + &self.conn, + &sql, + Some(OwnedValue::from(value)), + Box::new(|stmt, value| { + let iter = stmt.query_map(params![value], |row| rowfunc(row))?; + let iter = iter.map(|x| match x { + Ok(t) => Ok(t), + Err(e) => Err(DatabaseError::Rusqlite(e)), + }); + Ok(Box::new(iter)) + }), + ) + } +} + +/// Possible errors from a database. +#[derive(Debug, thiserror::Error)] +pub enum DatabaseError { + /// An error from the rusqlite crate. + #[error(transparent)] + Rusqlite(#[from] rusqlite::Error), + + /// The database being created already exists. + #[error("Database {0} already exists")] + Exists(PathBuf), +} + +// A pointer to a "fallible iterator" over values of type `T`, which is to say it's an iterator +// over values of type `Result`. The iterator is only valid for the +// lifetime 'stmt. +// +// The fact that it's a pointer (`Box`) means we don't care what the actual type of +// the iterator is, and who produces it. +type SqlResultsIterator<'stmt, T> = Box> + 'stmt>; + +// A pointer to a function which, when called on a prepared SQLite statement, would create +// a "fallible iterator" over values of type `ItemT`. (See above for an explanation of what +// a "fallible iterator" is.) +// +// The iterator is only valid for the lifetime of the associated SQLite statement; we +// call this lifetime 'stmt, and use it both both on the reference and the returned iterator. +// +// Now we're in a pickle: all named lifetimes have to be declared _somewhere_, but we can't add +// 'stmt to the signature of `CreateIterFn` because then we'll have to specify it when we +// define the function. Obviously, at that point we won't yet have a `Statement`, and thus we +// would have no idea what its lifetime is going to be. So we can't put the 'stmt lifetime into +// the signature of `CreateIterFn`. +// +// That's what `for<'stmt>` is for. This is a so-called ["higher-rank trait bound"][hrtb], and +// it enables us to say that a function is valid for *some* lifetime 'stmt that we pass into it +// at the call site. It lets Rust continue to track lifetimes even though `CreateIterFn` +// interferes by "hiding" the 'stmt lifetime from its signature. +// +// [hrtb]: https://doc.rust-lang.org/nomicon/hrtb.html +type CreateIterFn<'conn, ItemT> = Box< + dyn for<'stmt> Fn( + &'stmt mut CachedStatement<'conn>, + &Option, + ) -> Result, DatabaseError>, +>; + +/// An iterator over rows from a query. +pub struct SqlResults<'conn, ItemT> { + stmt: CachedStatement<'conn>, + value: Option, + create_iter: CreateIterFn<'conn, ItemT>, +} + +impl<'conn, ItemT> SqlResults<'conn, ItemT> { + fn new( + conn: &'conn Connection, + statement: &str, + value: Option, + create_iter: CreateIterFn<'conn, ItemT>, + ) -> Result { + let stmt = conn.prepare_cached(statement)?; + Ok(Self { + stmt, + value, + create_iter, + }) + } + + /// Create an iterator over results. + pub fn iter(&'_ mut self) -> Result, DatabaseError> { + (self.create_iter)(&mut self.stmt, &self.value) + } +} + +/// Describe a table in a row. +pub struct Table { + table: String, + columns: Vec, + insert: Option, + column_names: HashSet, +} + +impl Table { + /// Create a new table description without columns. + /// + /// The table description is not "built". You must add columns and + /// then call the [`build`] method. + pub fn new(table: &str) -> Self { + Self { + table: table.to_string(), + columns: vec![], + insert: None, + column_names: HashSet::new(), + } + } + + /// Append a column. + pub fn column(mut self, column: Column) -> Self { + self.column_names.insert(column.name().to_string()); + self.columns.push(column); + self + } + + /// Finish building the table description. + pub fn build(mut self) -> Self { + assert!(self.insert.is_none()); + self.insert = Some(sql_statement::insert(&self)); + self + } + + fn has_columns(&self, values: &[Value]) -> bool { + assert!(self.insert.is_some()); + for v in values.iter() { + if !self.column_names.contains(v.name()) { + return false; + } + } + true + } + + fn has_column(&self, value: &Value) -> bool { + assert!(self.insert.is_some()); + self.column_names.contains(value.name()) + } + + fn insert(&self) -> &str { + assert!(self.insert.is_some()); + self.insert.as_ref().unwrap() + } + + /// What is the name of the table? + pub fn name(&self) -> &str { + &self.table + } + + /// How many columns does the table have? + pub fn num_columns(&self) -> usize { + self.columns.len() + } + + /// What are the names of the columns in the table? + pub fn column_names(&self) -> impl Iterator { + self.columns.iter().map(|c| c.name()) + } + + /// Return SQL column definitions for the table. + pub fn column_definitions(&self) -> String { + let mut ret = String::new(); + for c in self.columns.iter() { + if !ret.is_empty() { + ret.push(','); + } + ret.push_str(c.name()); + ret.push(' '); + ret.push_str(c.typename()); + } + ret + } +} + +/// A column in a table description. +pub enum Column { + /// An integer primary key. + PrimaryKey(String), + /// An integer. + Int(String), + /// A text string. + Text(String), + /// A binary string. + Blob(String), + /// A boolean. + Bool(String), +} + +impl Column { + fn name(&self) -> &str { + match self { + Self::PrimaryKey(name) => name, + Self::Int(name) => name, + Self::Text(name) => name, + Self::Blob(name) => name, + Self::Bool(name) => name, + } + } + + fn typename(&self) -> &str { + match self { + Self::PrimaryKey(_) => "INTEGER PRIMARY KEY", + Self::Int(_) => "INTEGER", + Self::Text(_) => "TEXT", + Self::Blob(_) => "BLOB", + Self::Bool(_) => "BOOLEAN", + } + } + + /// Create an integer primary key column. + pub fn primary_key(name: &str) -> Self { + Self::PrimaryKey(name.to_string()) + } + + /// Create an integer column. + pub fn int(name: &str) -> Self { + Self::Int(name.to_string()) + } + + /// Create a text string column. + pub fn text(name: &str) -> Self { + Self::Text(name.to_string()) + } + + /// Create a binary string column. + pub fn blob(name: &str) -> Self { + Self::Blob(name.to_string()) + } + + /// Create a boolean column. + pub fn bool(name: &str) -> Self { + Self::Bool(name.to_string()) + } +} + +/// A value in a named column. +#[derive(Debug)] +pub enum Value<'a> { + /// An integer primary key. + PrimaryKey(&'a str, u64), + /// An integer. + Int(&'a str, u64), + /// A text string. + Text(&'a str, &'a str), + /// A binary string. + Blob(&'a str, &'a [u8]), + /// A boolean. + Bool(&'a str, bool), +} + +impl<'a> Value<'a> { + /// What column should store this value? + pub fn name(&self) -> &str { + match self { + Self::PrimaryKey(name, _) => name, + Self::Int(name, _) => name, + Self::Text(name, _) => name, + Self::Blob(name, _) => name, + Self::Bool(name, _) => name, + } + } + + /// Create an integer primary key value. + pub fn primary_key(name: &'a str, value: u64) -> Self { + Self::PrimaryKey(name, value) + } + + /// Create an integer value. + pub fn int(name: &'a str, value: u64) -> Self { + Self::Int(name, value) + } + + /// Create a text string value. + pub fn text(name: &'a str, value: &'a str) -> Self { + Self::Text(name, value) + } + + /// Create a binary string value. + pub fn blob(name: &'a str, value: &'a [u8]) -> Self { + Self::Blob(name, value) + } + + /// Create a boolean value. + pub fn bool(name: &'a str, value: bool) -> Self { + Self::Bool(name, value) + } +} + +impl<'a> ToSql for Value<'a> { + // The trait defines to_sql to return a Result. However, for our + // particular case, to_sql can't ever fail. We only store values + // in types for which conversion always succeeds: integer, + // boolean, text, and blob. _For us_, the caller need never worry + // that the conversion fails, but we can't express that in the + // type system. + fn to_sql(&self) -> Result { + use rusqlite::types::ValueRef; + let v = match self { + Self::PrimaryKey(_, v) => ValueRef::Integer( + i64::try_from(*v) + .map_err(|err| rusqlite::Error::ToSqlConversionFailure(Box::new(err)))?, + ), + Self::Int(_, v) => ValueRef::Integer( + i64::try_from(*v) + .map_err(|err| rusqlite::Error::ToSqlConversionFailure(Box::new(err)))?, + ), + Self::Bool(_, v) => ValueRef::Integer( + i64::try_from(*v) + .map_err(|err| rusqlite::Error::ToSqlConversionFailure(Box::new(err)))?, + ), + Self::Text(_, v) => ValueRef::Text(v.as_ref()), + Self::Blob(_, v) => ValueRef::Blob(v), + }; + Ok(ToSqlOutput::Borrowed(v)) + } +} + +/// Like a Value, but owns the data. +pub enum OwnedValue { + /// An integer primary key. + PrimaryKey(String, u64), + /// An integer. + Int(String, u64), + /// A text string. + Text(String, String), + /// A binary string. + Blob(String, Vec), + /// A boolean. + Bool(String, bool), +} + +impl From<&Value<'_>> for OwnedValue { + fn from(v: &Value) -> Self { + match *v { + Value::PrimaryKey(name, v) => Self::PrimaryKey(name.to_string(), v), + Value::Int(name, v) => Self::Int(name.to_string(), v), + Value::Text(name, v) => Self::Text(name.to_string(), v.to_string()), + Value::Blob(name, v) => Self::Blob(name.to_string(), v.to_vec()), + Value::Bool(name, v) => Self::Bool(name.to_string(), v), + } + } +} + +impl ToSql for OwnedValue { + fn to_sql(&self) -> rusqlite::Result { + use rusqlite::types::Value; + let v = match self { + Self::PrimaryKey(_, v) => Value::Integer( + i64::try_from(*v) + .map_err(|err| rusqlite::Error::ToSqlConversionFailure(Box::new(err)))?, + ), + Self::Int(_, v) => Value::Integer( + i64::try_from(*v) + .map_err(|err| rusqlite::Error::ToSqlConversionFailure(Box::new(err)))?, + ), + Self::Bool(_, v) => Value::Integer( + i64::try_from(*v) + .map_err(|err| rusqlite::Error::ToSqlConversionFailure(Box::new(err)))?, + ), + Self::Text(_, v) => Value::Text(v.to_string()), + Self::Blob(_, v) => Value::Blob(v.to_vec()), + }; + Ok(ToSqlOutput::Owned(v)) + } +} + +impl rusqlite::types::ToSql for FilesystemEntry { + fn to_sql(&self) -> rusqlite::Result> { + let json = serde_json::to_string(self) + .map_err(|err| rusqlite::Error::ToSqlConversionFailure(Box::new(err)))?; + let json = rusqlite::types::Value::Text(json); + Ok(ToSqlOutput::Owned(json)) + } +} + +mod sql_statement { + use super::Table; + + pub fn create_table(table: &Table) -> String { + format!( + "CREATE TABLE {} ({})", + table.name(), + table.column_definitions() + ) + } + + pub fn create_index(name: &str, table: &Table, column: &str) -> String { + format!("CREATE INDEX {} ON {} ({})", name, table.name(), column,) + } + + pub fn insert(table: &Table) -> String { + format!( + "INSERT INTO {} ({}) VALUES ({})", + table.name(), + &column_names(table), + placeholders(table.column_names().count()) + ) + } + + pub fn select_all_rows(table: &Table) -> String { + format!("SELECT * FROM {}", table.name()) + } + + pub fn select_some_rows(table: &Table, column: &str) -> String { + format!("SELECT * FROM {} WHERE {} = ?", table.name(), column) + } + + fn column_names(table: &Table) -> String { + table.column_names().collect::>().join(",") + } + + fn placeholders(num_columns: usize) -> String { + let mut s = String::new(); + for _ in 0..num_columns { + if !s.is_empty() { + s.push(','); + } + s.push('?'); + } + s + } +} + +#[cfg(test)] +mod test { + use super::*; + use std::path::Path; + use tempfile::tempdir; + + fn get_bar(row: &rusqlite::Row) -> Result { + row.get("bar") + } + + fn table() -> Table { + Table::new("foo").column(Column::int("bar")).build() + } + + fn create_db(file: &Path) -> Database { + let table = table(); + let db = Database::create(file).unwrap(); + db.create_table(&table).unwrap(); + db + } + + fn open_db(file: &Path) -> Database { + Database::open(file).unwrap() + } + + fn insert(db: &mut Database, value: u64) { + let table = table(); + db.insert(&table, &[Value::int("bar", value)]).unwrap(); + } + + fn values(db: Database) -> Vec { + let table = table(); + let mut rows = db.all_rows(&table, &get_bar).unwrap(); + let iter = rows.iter().unwrap(); + let mut values = vec![]; + for x in iter { + values.push(x.unwrap()); + } + values + } + + #[test] + fn creates_db() { + let tmp = tempdir().unwrap(); + let filename = tmp.path().join("test.db"); + let db = Database::create(&filename).unwrap(); + db.close().unwrap(); + let _ = Database::open(&filename).unwrap(); + } + + #[test] + fn inserts_row() { + let tmp = tempdir().unwrap(); + let filename = tmp.path().join("test.db"); + let mut db = create_db(&filename); + insert(&mut db, 42); + db.close().unwrap(); + + let db = open_db(&filename); + let values = values(db); + assert_eq!(values, vec![42]); + } + + #[test] + fn inserts_many_rows() { + const N: u64 = 1000; + + let tmp = tempdir().unwrap(); + let filename = tmp.path().join("test.db"); + let mut db = create_db(&filename); + for i in 0..N { + insert(&mut db, i); + } + db.close().unwrap(); + + let db = open_db(&filename); + let values = values(db); + assert_eq!(values.len() as u64, N); + + let mut expected = vec![]; + for i in 0..N { + expected.push(i); + } + assert_eq!(values, expected); + } +} diff --git a/src/error.rs b/src/error.rs index e8f5ee8..cf18c83 100644 --- a/src/error.rs +++ b/src/error.rs @@ -5,6 +5,7 @@ use crate::cipher::CipherError; use crate::client::ClientError; use crate::cmd::restore::RestoreError; use crate::config::ClientConfigError; +use crate::db::DatabaseError; use crate::generation::{LocalGenerationError, NascentError}; use crate::genlist::GenerationListError; use crate::passwords::PasswordError; @@ -51,6 +52,10 @@ pub enum ObnamError { #[error(transparent)] LocalGenerationError(#[from] LocalGenerationError), + /// Error using a Database. + #[error(transparent)] + Database(#[from] DatabaseError), + /// Error restoring a backup. #[error(transparent)] RestoreError(#[from] RestoreError), -- cgit v1.2.1 From f71d15936dfe1357d771339801cef39dddc02eaa Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Fri, 25 Feb 2022 11:24:31 +0200 Subject: refactor: add a high level database abstraction for backups Sponsored-by: author --- src/dbgen.rs | 320 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/generation.rs | 8 +- 2 files changed, 321 insertions(+), 7 deletions(-) create mode 100644 src/dbgen.rs diff --git a/src/dbgen.rs b/src/dbgen.rs new file mode 100644 index 0000000..88fe3d8 --- /dev/null +++ b/src/dbgen.rs @@ -0,0 +1,320 @@ +//! Database abstraction for generations. + +use crate::backup_reason::Reason; +use crate::chunkid::ChunkId; +use crate::db::{Column, Database, DatabaseError, SqlResults, Table, Value}; +use crate::fsentry::FilesystemEntry; +use log::error; +use std::collections::HashMap; +use std::os::unix::ffi::OsStrExt; +use std::path::{Path, PathBuf}; + +/// Current generation database schema major version. +pub const SCHEMA_MAJOR: u32 = 0; + +/// Current generation database schema minor version. +pub const SCHEMA_MINOR: u32 = 0; + +/// An identifier for a file in a generation. +pub type FileId = u64; + +/// Possible errors from using generation databases. +#[derive(Debug, thiserror::Error)] +pub enum GenerationDbError { + /// Duplicate file names. + #[error("Generation has more than one file with the name {0}")] + TooManyFiles(PathBuf), + + /// No 'meta' table in generation. + #[error("Generation does not have a 'meta' table")] + NoMeta, + + /// Missing from from 'meta' table. + #[error("Generation 'meta' table does not have a row {0}")] + NoMetaKey(String), + + /// Bad data in 'meta' table. + #[error("Generation 'meta' row {0} has badly formed integer: {1}")] + BadMetaInteger(String, std::num::ParseIntError), + + /// Local generation uses a schema version that this version of + /// Obnam isn't compatible with. + #[error("Backup is not compatible with this version of Obnam: {0}.{1}")] + Incompatible(u32, u32), + + /// Error from a database + #[error(transparent)] + Database(#[from] DatabaseError), + + /// Error from JSON. + #[error(transparent)] + SerdeJsonError(#[from] serde_json::Error), + + /// Error from I/O. + #[error(transparent)] + IoError(#[from] std::io::Error), +} + +/// A database representing a backup generation. +pub struct GenerationDb { + created: bool, + db: Database, + meta: Table, + files: Table, + chunks: Table, +} + +impl GenerationDb { + /// Create a new generation database in read/write mode. + pub fn create>(filename: P) -> Result { + let db = Database::create(filename.as_ref())?; + let mut moi = Self::new(db); + moi.created = true; + moi.create_tables()?; + Ok(moi) + } + + /// Open an existing generation database in read-only mode. + pub fn open>(filename: P) -> Result { + let db = Database::open(filename.as_ref())?; + Ok(Self::new(db)) + } + + fn new(db: Database) -> Self { + let meta = Table::new("meta") + .column(Column::text("key")) + .column(Column::text("value")) + .build(); + let files = Table::new("files") + .column(Column::primary_key("fileno")) // FIXME: rename to fileid + .column(Column::blob("filename")) + .column(Column::text("json")) + .column(Column::text("reason")) + .column(Column::bool("is_cachedir_tag")) + .build(); + let chunks = Table::new("chunks") + .column(Column::int("fileno")) // FIXME: rename to fileid + .column(Column::text("chunkid")) + .build(); + + Self { + created: false, + db, + meta, + files, + chunks, + } + } + + fn create_tables(&mut self) -> Result<(), GenerationDbError> { + self.db.create_table(&self.meta)?; + self.db.create_table(&self.files)?; + self.db.create_table(&self.chunks)?; + + self.db.insert( + &self.meta, + &[ + Value::text("key", "schema_version_major"), + Value::text("value", &format!("{}", SCHEMA_MAJOR)), + ], + )?; + self.db.insert( + &self.meta, + &[ + Value::text("key", "schema_version_minor"), + Value::text("value", &format!("{}", SCHEMA_MINOR)), + ], + )?; + + Ok(()) + } + + /// Close a database, commit any changes. + pub fn close(self) -> Result<(), GenerationDbError> { + if self.created { + self.db + .create_index("filenames_idx", &self.files, "filename")?; + self.db.create_index("fileid_idx", &self.chunks, "fileno")?; + } + self.db.close().map_err(GenerationDbError::Database) + } + + /// Return contents of "meta" table as a HashMap. + pub fn meta(&self) -> Result, GenerationDbError> { + let mut map = HashMap::new(); + let mut iter = self.db.all_rows(&self.meta, &row_to_kv)?; + for kv in iter.iter()? { + let (key, value) = kv?; + map.insert(key, value); + } + Ok(map) + } + + /// Insert a file system entry into the database. + pub fn insert( + &mut self, + e: FilesystemEntry, + fileid: FileId, + ids: &[ChunkId], + reason: Reason, + is_cachedir_tag: bool, + ) -> Result<(), GenerationDbError> { + let json = serde_json::to_string(&e)?; + self.db.insert( + &self.files, + &[ + Value::primary_key("fileno", fileid), + Value::blob("filename", &path_into_blob(&e.pathbuf())), + Value::text("json", &json), + Value::text("reason", &format!("{}", reason)), + Value::bool("is_cachedir_tag", is_cachedir_tag), + ], + )?; + for id in ids { + self.db.insert( + &self.chunks, + &[ + Value::int("fileno", fileid), + Value::text("chunkid", &format!("{}", id)), + ], + )?; + } + Ok(()) + } + + /// Count number of file system entries. + pub fn file_count(&self) -> Result { + // FIXME: this needs to be done use "SELECT count(*) FROM + // files", but the Database abstraction doesn't support that + // yet. + let mut iter = self.db.all_rows(&self.files, &row_to_entry)?; + let mut count = 0; + for _ in iter.iter()? { + count += 1; + } + Ok(count) + } + + /// Does a path refer to a cache directory? + pub fn is_cachedir_tag(&self, filename: &Path) -> Result { + let filename = path_into_blob(filename); + let value = Value::blob("filename", &filename); + let mut rows = self.db.some_rows(&self.files, &value, &row_to_entry)?; + let mut iter = rows.iter()?; + + if let Some(row) = iter.next() { + if iter.next().is_some() { + Ok(false) + } else { + let (_, _, _, is_cachedir_tag) = row?; + Ok(is_cachedir_tag) + } + } else { + Ok(false) + } + } + + /// Return all chunk ids in database. + pub fn chunkids(&self, fileid: FileId) -> Result, GenerationDbError> { + let fileid = Value::int("fileno", fileid); + Ok(self.db.some_rows(&self.chunks, &fileid, &row_to_chunkid)?) + } + + /// Return all chunk ids in database. + pub fn files( + &self, + ) -> Result, GenerationDbError> { + Ok(self.db.all_rows(&self.files, &row_to_fsentry)?) + } + + /// Get a file's information given its path. + pub fn get_file(&self, filename: &Path) -> Result, GenerationDbError> { + match self.get_file_and_fileno(filename)? { + None => Ok(None), + Some((_, e, _)) => Ok(Some(e)), + } + } + + /// Get a file's information given it's id in the database. + pub fn get_fileno(&self, filename: &Path) -> Result, GenerationDbError> { + match self.get_file_and_fileno(filename)? { + None => Ok(None), + Some((id, _, _)) => Ok(Some(id)), + } + } + + fn get_file_and_fileno( + &self, + filename: &Path, + ) -> Result, GenerationDbError> { + let filename_bytes = path_into_blob(filename); + let value = Value::blob("filename", &filename_bytes); + let mut rows = self.db.some_rows(&self.files, &value, &row_to_entry)?; + let mut iter = rows.iter()?; + + if let Some(row) = iter.next() { + if iter.next().is_some() { + error!("too many files in file lookup"); + Err(GenerationDbError::TooManyFiles(filename.to_path_buf())) + } else { + let (fileid, ref json, ref reason, _) = row?; + let entry = serde_json::from_str(json)?; + Ok(Some((fileid, entry, reason.to_string()))) + } + } else { + Ok(None) + } + } +} + +fn row_to_kv(row: &rusqlite::Row) -> rusqlite::Result<(String, String)> { + let k = row.get("key")?; + let v = row.get("value")?; + Ok((k, v)) +} + +fn path_into_blob(path: &Path) -> Vec { + path.as_os_str().as_bytes().to_vec() +} + +fn row_to_entry(row: &rusqlite::Row) -> rusqlite::Result<(FileId, String, String, bool)> { + let fileno: FileId = row.get("fileno")?; + let json: String = row.get("json")?; + let reason: String = row.get("reason")?; + let is_cachedir_tag: bool = row.get("is_cachedir_tag")?; + Ok((fileno, json, reason, is_cachedir_tag)) +} + +fn row_to_fsentry( + row: &rusqlite::Row, +) -> rusqlite::Result<(FileId, FilesystemEntry, Reason, bool)> { + let fileno: FileId = row.get("fileno")?; + let json: String = row.get("json")?; + let entry = serde_json::from_str(&json).map_err(|err| { + rusqlite::Error::FromSqlConversionFailure(0, rusqlite::types::Type::Text, Box::new(err)) + })?; + let reason: String = row.get("reason")?; + let reason = Reason::from(&reason); + let is_cachedir_tag: bool = row.get("is_cachedir_tag")?; + Ok((fileno, entry, reason, is_cachedir_tag)) +} + +fn row_to_chunkid(row: &rusqlite::Row) -> rusqlite::Result { + let chunkid: String = row.get("chunkid")?; + let chunkid = ChunkId::recreate(&chunkid); + Ok(chunkid) +} + +#[cfg(test)] +mod test { + use super::Database; + use tempfile::tempdir; + + #[test] + fn opens_previously_created_db() { + let dir = tempdir().unwrap(); + let filename = dir.path().join("test.db"); + Database::create(&filename).unwrap(); + assert!(Database::open(&filename).is_ok()); + } +} diff --git a/src/generation.rs b/src/generation.rs index 2a886ad..90ad2c7 100644 --- a/src/generation.rs +++ b/src/generation.rs @@ -2,7 +2,7 @@ use crate::backup_reason::Reason; use crate::chunkid::ChunkId; -use crate::dbgen::FileId; +use crate::dbgen::{FileId, SCHEMA_MAJOR, SCHEMA_MINOR}; use crate::fsentry::FilesystemEntry; use rusqlite::Connection; use serde::Serialize; @@ -10,12 +10,6 @@ use std::collections::HashMap; use std::fmt; use std::path::{Path, PathBuf}; -/// Current generation database schema major version. -const SCHEMA_MAJOR: u32 = 0; - -/// Current generation database schema minor version. -const SCHEMA_MINOR: u32 = 0; - /// An identifier for a generation. #[derive(Debug, Clone)] pub struct GenId { -- cgit v1.2.1 From 630ef5e8aa07545daf4c6bb5b23992cdd54c1ce2 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Thu, 3 Mar 2022 18:51:38 +0200 Subject: refactor: use new database abstraction for generations Sponsored-by: author --- src/backup_run.rs | 11 +- src/cmd/backup.rs | 15 ++- src/cmd/list_files.rs | 4 +- src/cmd/restore.rs | 27 ++-- src/cmd/show_gen.rs | 3 +- src/dbgen.rs | 17 ++- src/generation.rs | 352 ++++++++------------------------------------------ 7 files changed, 94 insertions(+), 335 deletions(-) diff --git a/src/backup_run.rs b/src/backup_run.rs index 464179b..9454625 100644 --- a/src/backup_run.rs +++ b/src/backup_run.rs @@ -7,6 +7,7 @@ use crate::chunker::{ChunkerError, FileChunks}; use crate::chunkid::ChunkId; use crate::client::{BackupClient, ClientError}; use crate::config::ClientConfig; +use crate::db::DatabaseError; use crate::dbgen::FileId; use crate::error::ObnamError; use crate::fsentry::{FilesystemEntry, FilesystemKind}; @@ -50,6 +51,10 @@ pub enum BackupError { #[error(transparent)] LocalGenerationError(#[from] LocalGenerationError), + /// An error using a Database. + #[error(transparent)] + Database(#[from] DatabaseError), + /// An error splitting data into chunks. #[error(transparent)] ChunkerError(#[from] ChunkerError), @@ -127,7 +132,7 @@ impl<'a> BackupRun<'a> { match genid { None => { // Create a new, empty generation. - NascentGeneration::create(oldname)?; + NascentGeneration::create(oldname)?.close()?; // Open the newly created empty generation. Ok(LocalGeneration::open(oldname)?) @@ -191,7 +196,9 @@ impl<'a> BackupRun<'a> { } } } - new.file_count() + let count = new.file_count(); + new.close()?; + count }; self.finish(); let gen_id = self.upload_nascent_generation(newpath).await?; diff --git a/src/cmd/backup.rs b/src/cmd/backup.rs index e4569e8..dae9811 100644 --- a/src/cmd/backup.rs +++ b/src/cmd/backup.rs @@ -10,7 +10,7 @@ use crate::generation::GenId; use log::info; use std::time::SystemTime; use structopt::StructOpt; -use tempfile::NamedTempFile; +use tempfile::tempdir; use tokio::runtime::Runtime; /// Make a backup. @@ -30,21 +30,22 @@ impl Backup { let client = BackupClient::new(config)?; let genlist = client.list_generations().await?; - let oldtemp = NamedTempFile::new()?; - let newtemp = NamedTempFile::new()?; + let temp = tempdir()?; + let oldtemp = temp.path().join("old.db"); + let newtemp = temp.path().join("new.db"); let (is_incremental, outcome) = match genlist.resolve("latest") { Err(_) => { info!("fresh backup without a previous generation"); let mut run = BackupRun::initial(config, &client)?; - let old = run.start(None, oldtemp.path()).await?; - (false, run.backup_roots(config, &old, newtemp.path()).await?) + let old = run.start(None, &oldtemp).await?; + (false, run.backup_roots(config, &old, &newtemp).await?) } Ok(old_id) => { info!("incremental backup based on {}", old_id); let mut run = BackupRun::incremental(config, &client)?; - let old = run.start(Some(&old_id), oldtemp.path()).await?; - (true, run.backup_roots(config, &old, newtemp.path()).await?) + let old = run.start(Some(&old_id), &oldtemp).await?; + (true, run.backup_roots(config, &old, &newtemp).await?) } }; diff --git a/src/cmd/list_files.rs b/src/cmd/list_files.rs index 12d34b1..9126564 100644 --- a/src/cmd/list_files.rs +++ b/src/cmd/list_files.rs @@ -34,8 +34,8 @@ impl ListFiles { let gen = client.fetch_generation(&gen_id, temp.path()).await?; for file in gen.files()?.iter()? { - let file = file?; - println!("{}", format_entry(file.entry(), file.reason())); + let (_, entry, reason, _) = file?; + println!("{}", format_entry(&entry, reason)); } Ok(()) diff --git a/src/cmd/restore.rs b/src/cmd/restore.rs index 983efbb..43d9679 100644 --- a/src/cmd/restore.rs +++ b/src/cmd/restore.rs @@ -3,6 +3,7 @@ use crate::backup_reason::Reason; use crate::client::{BackupClient, ClientError}; use crate::config::ClientConfig; +use crate::db::DatabaseError; use crate::dbgen::FileId; use crate::error::ObnamError; use crate::fsentry::{FilesystemEntry, FilesystemKind}; @@ -54,26 +55,16 @@ impl Restore { info!("restoring {} files", gen.file_count()?); let progress = create_progress_bar(gen.file_count()?, true); for file in gen.files()?.iter()? { - let file = file?; - match file.reason() { + let (fileno, entry, reason, _) = file?; + match reason { Reason::FileError => (), - _ => { - restore_generation( - &client, - &gen, - file.fileno(), - file.entry(), - &self.to, - &progress, - ) - .await? - } + _ => restore_generation(&client, &gen, fileno, &entry, &self.to, &progress).await?, } } for file in gen.files()?.iter()? { - let file = file?; - if file.entry().is_dir() { - restore_directory_metadata(file.entry(), &self.to)?; + let (_, entry, _, _) = file?; + if entry.is_dir() { + restore_directory_metadata(&entry, &self.to)?; } } progress.finish(); @@ -85,6 +76,10 @@ impl Restore { /// Possible errors from restoring. #[derive(Debug, thiserror::Error)] pub enum RestoreError { + /// An error using a Database. + #[error(transparent)] + Database(#[from] DatabaseError), + /// Failed to create a name pipe. #[error("Could not create named pipe (FIFO) {0}")] NamedPipeCreationError(PathBuf), diff --git a/src/cmd/show_gen.rs b/src/cmd/show_gen.rs index 6c8ba19..006e0e0 100644 --- a/src/cmd/show_gen.rs +++ b/src/cmd/show_gen.rs @@ -35,8 +35,7 @@ impl ShowGeneration { let mut files = files.iter()?; let total_bytes = files.try_fold(0, |acc, file| { - file.map(|file| { - let e = file.entry(); + file.map(|(_, e, _, _)| { if e.kind() == FilesystemKind::Regular { acc + e.len() } else { diff --git a/src/dbgen.rs b/src/dbgen.rs index 88fe3d8..7e54d7d 100644 --- a/src/dbgen.rs +++ b/src/dbgen.rs @@ -197,14 +197,18 @@ impl GenerationDb { /// Does a path refer to a cache directory? pub fn is_cachedir_tag(&self, filename: &Path) -> Result { - let filename = path_into_blob(filename); - let value = Value::blob("filename", &filename); + let filename_vec = path_into_blob(filename); + let value = Value::blob("filename", &filename_vec); let mut rows = self.db.some_rows(&self.files, &value, &row_to_entry)?; let mut iter = rows.iter()?; if let Some(row) = iter.next() { + // Make sure there's only one row for a given filename. A + // bug in a previous version, or a maliciously constructed + // generation, could result in there being more than one. if iter.next().is_some() { - Ok(false) + error!("too many files in file lookup"); + Err(GenerationDbError::TooManyFiles(filename.to_path_buf())) } else { let (_, _, _, is_cachedir_tag) = row?; Ok(is_cachedir_tag) @@ -220,7 +224,7 @@ impl GenerationDb { Ok(self.db.some_rows(&self.chunks, &fileid, &row_to_chunkid)?) } - /// Return all chunk ids in database. + /// Return all file descriptions in database. pub fn files( &self, ) -> Result, GenerationDbError> { @@ -235,7 +239,7 @@ impl GenerationDb { } } - /// Get a file's information given it's id in the database. + /// Get a file's information given its id in the database. pub fn get_fileno(&self, filename: &Path) -> Result, GenerationDbError> { match self.get_file_and_fileno(filename)? { None => Ok(None), @@ -253,6 +257,9 @@ impl GenerationDb { let mut iter = rows.iter()?; if let Some(row) = iter.next() { + // Make sure there's only one row for a given filename. A + // bug in a previous version, or a maliciously constructed + // generation, could result in there being more than one. if iter.next().is_some() { error!("too many files in file lookup"); Err(GenerationDbError::TooManyFiles(filename.to_path_buf())) diff --git a/src/generation.rs b/src/generation.rs index 90ad2c7..3950a0c 100644 --- a/src/generation.rs +++ b/src/generation.rs @@ -2,9 +2,9 @@ use crate::backup_reason::Reason; use crate::chunkid::ChunkId; -use crate::dbgen::{FileId, SCHEMA_MAJOR, SCHEMA_MINOR}; +use crate::db::{DatabaseError, SqlResults}; +use crate::dbgen::{FileId, GenerationDb, GenerationDbError, SCHEMA_MAJOR, SCHEMA_MINOR}; use crate::fsentry::FilesystemEntry; -use rusqlite::Connection; use serde::Serialize; use std::collections::HashMap; use std::fmt; @@ -43,7 +43,7 @@ impl fmt::Display for GenId { /// finished yet, and it's not actually on the server until the upload /// of its generation chunk. pub struct NascentGeneration { - conn: Connection, + db: GenerationDb, fileno: FileId, } @@ -58,6 +58,10 @@ pub enum NascentError { #[error(transparent)] LocalGenerationError(#[from] LocalGenerationError), + /// Error from a GenerationDb. + #[error(transparent)] + GenerationDb(#[from] GenerationDbError), + /// Error from an SQL transaction. #[error("SQL transaction error: {0}")] Transaction(rusqlite::Error), @@ -77,8 +81,13 @@ impl NascentGeneration { where P: AsRef, { - let conn = sql::create_db(filename.as_ref())?; - Ok(Self { conn, fileno: 0 }) + let db = GenerationDb::create(filename.as_ref())?; + Ok(Self { db, fileno: 0 }) + } + + /// Commit any changes, and close the database. + pub fn close(self) -> Result<(), NascentError> { + self.db.close().map_err(NascentError::GenerationDb) } /// How many files are there now in the nascent generation? @@ -94,10 +103,9 @@ impl NascentGeneration { reason: Reason, is_cachedir_tag: bool, ) -> Result<(), NascentError> { - let t = self.conn.transaction().map_err(NascentError::Transaction)?; self.fileno += 1; - sql::insert_one(&t, e, self.fileno, ids, reason, is_cachedir_tag)?; - t.commit().map_err(NascentError::Commit)?; + self.db + .insert(e, self.fileno, ids, reason, is_cachedir_tag)?; Ok(()) } } @@ -138,7 +146,7 @@ impl FinishedGeneration { /// This is for querying an existing generation, and other read-only /// operations. pub struct LocalGeneration { - conn: Connection, + db: GenerationDb, } /// Possible errors from using local generations. @@ -169,6 +177,14 @@ pub enum LocalGenerationError { #[error(transparent)] RusqliteError(#[from] rusqlite::Error), + /// Error from a GenerationDb. + #[error(transparent)] + GenerationDb(#[from] GenerationDbError), + + /// Error from a Database. + #[error(transparent)] + Database(#[from] DatabaseError), + /// Error from JSON. #[error(transparent)] SerdeJsonError(#[from] serde_json::Error), @@ -187,8 +203,7 @@ pub struct BackedUpFile { impl BackedUpFile { /// Create a new `BackedUpFile`. - pub fn new(fileno: FileId, entry: FilesystemEntry, reason: &str) -> Self { - let reason = Reason::from(reason); + pub fn new(fileno: FileId, entry: FilesystemEntry, reason: Reason) -> Self { Self { fileno, entry, @@ -213,8 +228,8 @@ impl BackedUpFile { } impl LocalGeneration { - fn new(conn: Connection) -> Self { - Self { conn } + fn new(db: GenerationDb) -> Self { + Self { db } } /// Open a local file as a local generation. @@ -222,8 +237,8 @@ impl LocalGeneration { where P: AsRef, { - let conn = sql::open_db(filename.as_ref())?; - let gen = Self::new(conn); + let db = GenerationDb::open(filename.as_ref())?; + let gen = Self::new(db); let schema = gen.meta()?.schema_version(); let our_schema = SchemaVersion::new(SCHEMA_MAJOR, SCHEMA_MINOR); if !our_schema.is_compatible_with(&schema) { @@ -237,26 +252,27 @@ impl LocalGeneration { /// Return generation metadata for local generation. pub fn meta(&self) -> Result { - let map = sql::meta(&self.conn)?; + let map = self.db.meta()?; GenMeta::from(map) } /// How many files are there in the local generation? pub fn file_count(&self) -> Result { - sql::file_count(&self.conn) + Ok(self.db.file_count()?) } /// Return all files in the local generation. - pub fn files(&self) -> Result, LocalGenerationError> { - sql::files(&self.conn) + pub fn files( + &self, + ) -> Result, LocalGenerationError> { + self.db.files().map_err(LocalGenerationError::GenerationDb) } /// Return ids for all chunks in local generation. - pub fn chunkids( - &self, - fileno: FileId, - ) -> Result, LocalGenerationError> { - sql::chunkids(&self.conn, fileno) + pub fn chunkids(&self, fileid: FileId) -> Result, LocalGenerationError> { + self.db + .chunkids(fileid) + .map_err(LocalGenerationError::GenerationDb) } /// Return entry for a file, given its pathname. @@ -264,17 +280,23 @@ impl LocalGeneration { &self, filename: &Path, ) -> Result, LocalGenerationError> { - sql::get_file(&self.conn, filename) + self.db + .get_file(filename) + .map_err(LocalGenerationError::GenerationDb) } /// Get the id in the local generation of a file, given its pathname. pub fn get_fileno(&self, filename: &Path) -> Result, LocalGenerationError> { - sql::get_fileno(&self.conn, filename) + self.db + .get_fileno(filename) + .map_err(LocalGenerationError::GenerationDb) } /// Does a pathname refer to a cache directory? pub fn is_cachedir_tag(&self, filename: &Path) -> Result { - sql::is_cachedir_tag(&self.conn, filename) + self.db + .is_cachedir_tag(filename) + .map_err(LocalGenerationError::GenerationDb) } } @@ -385,278 +407,6 @@ mod test_schema { } } -mod sql { - use super::BackedUpFile; - use super::FileId; - use super::LocalGenerationError; - use crate::backup_reason::Reason; - use crate::chunkid::ChunkId; - use crate::fsentry::FilesystemEntry; - use crate::generation::SCHEMA_MAJOR; - use crate::generation::SCHEMA_MINOR; - use log::debug; - use rusqlite::{params, Connection, OpenFlags, Row, Statement, Transaction}; - use std::collections::HashMap; - use std::os::unix::ffi::OsStrExt; - use std::path::Path; - - /// Create a new database in a file. - pub fn create_db(filename: &Path) -> Result { - let flags = OpenFlags::SQLITE_OPEN_CREATE | OpenFlags::SQLITE_OPEN_READ_WRITE; - let conn = Connection::open_with_flags(filename, flags)?; - conn.execute("CREATE TABLE meta (key TEXT, value TEXT)", params![])?; - init_meta(&conn)?; - conn.execute( - "CREATE TABLE files (fileno INTEGER PRIMARY KEY, filename BLOB, json TEXT, reason TEXT, is_cachedir_tag BOOLEAN)", - params![], - )?; - conn.execute( - "CREATE TABLE chunks (fileno INTEGER, chunkid TEXT)", - params![], - )?; - conn.execute("CREATE INDEX filenames ON files (filename)", params![])?; - conn.execute("CREATE INDEX filenos ON chunks (fileno)", params![])?; - conn.pragma_update(None, "journal_mode", &"WAL")?; - Ok(conn) - } - - fn init_meta(conn: &Connection) -> Result<(), LocalGenerationError> { - conn.execute( - "INSERT INTO meta (key, value) VALUES (?1, ?2)", - params!["schema_version_major", SCHEMA_MAJOR], - )?; - conn.execute( - "INSERT INTO meta (key, value) VALUES (?1, ?2)", - params!["schema_version_minor", SCHEMA_MINOR], - )?; - Ok(()) - } - - /// Open an existing database in a file. - pub fn open_db(filename: &Path) -> Result { - let flags = OpenFlags::SQLITE_OPEN_READ_WRITE; - let conn = Connection::open_with_flags(filename, flags)?; - conn.pragma_update(None, "journal_mode", &"WAL")?; - Ok(conn) - } - - /// Return generation metadata from a database. - pub fn meta(conn: &Connection) -> Result, LocalGenerationError> { - let mut stmt = conn.prepare("SELECT key, value FROM meta")?; - let iter = stmt.query_map(params![], row_to_key_value)?; - let mut map = HashMap::new(); - for r in iter { - let (key, value) = r?; - map.insert(key, value); - } - Ok(map) - } - - fn row_to_key_value(row: &Row) -> rusqlite::Result<(String, String)> { - let key: String = row.get("key")?; - let value: String = row.get("value")?; - Ok((key, value)) - } - - /// Insert one file system entry into the database. - pub fn insert_one( - t: &Transaction, - e: FilesystemEntry, - fileno: FileId, - ids: &[ChunkId], - reason: Reason, - is_cachedir_tag: bool, - ) -> Result<(), LocalGenerationError> { - let json = serde_json::to_string(&e)?; - t.execute( - "INSERT INTO files (fileno, filename, json, reason, is_cachedir_tag) VALUES (?1, ?2, ?3, ?4, ?5)", - params![fileno, path_into_blob(&e.pathbuf()), &json, reason, is_cachedir_tag,], - )?; - for id in ids { - t.execute( - "INSERT INTO chunks (fileno, chunkid) VALUES (?1, ?2)", - params![fileno, id], - )?; - } - Ok(()) - } - - fn path_into_blob(path: &Path) -> Vec { - path.as_os_str().as_bytes().to_vec() - } - - /// Parse an SQL query result row. - pub fn row_to_entry(row: &Row) -> rusqlite::Result<(FileId, String, String)> { - let fileno: FileId = row.get("fileno")?; - let json: String = row.get("json")?; - let reason: String = row.get("reason")?; - Ok((fileno, json, reason)) - } - - /// Count number of file system entries. - pub fn file_count(conn: &Connection) -> Result { - let mut stmt = conn.prepare("SELECT count(*) FROM files")?; - let mut iter = stmt.query_map(params![], |row| row.get(0))?; - let count = iter.next().expect("SQL count result (1)"); - let count = count?; - Ok(count) - } - - // A pointer to a "fallible iterator" over values of type `T`, which is to say it's an iterator - // over values of type `Result`. The iterator is only valid for the - // lifetime 'stmt. - // - // The fact that it's a pointer (`Box`) means we don't care what the actual type of - // the iterator is, and who produces it. - type SqlResultsIterator<'stmt, T> = - Box> + 'stmt>; - - // A pointer to a function which, when called on a prepared SQLite statement, would create - // a "fallible iterator" over values of type `ItemT`. (See above for an explanation of what - // a "fallible iterator" is.) - // - // The iterator is only valid for the lifetime of the associated SQLite statement; we - // call this lifetime 'stmt, and use it both both on the reference and the returned Now. - // - // we iterator're in a pickle: all named lifetimes have to be declared _somewhere_, but we can't add - // 'stmt to the signature of `CreateIterFn` because then we'll have to specify it when we - // define the function. Obviously, at that point we won't yet have a `Statement`, and thus we - // would have no idea what its lifetime is going to be. So we can't put the 'stmt lifetime into - // the signature of `CreateIterFn`. - // - // That's what `for<'stmt>` is for. This is a so-called ["higher-rank trait bound"][hrtb], and - // it enables us to say that a function is valid for *some* lifetime 'stmt that we pass into it - // at the call site. It lets Rust continue to track lifetimes even though `CreateIterFn` - // interferes by "hiding" the 'stmt lifetime from its signature. - // - // [hrtb]: https://doc.rust-lang.org/nomicon/hrtb.html - type CreateIterFn<'conn, ItemT> = Box< - dyn for<'stmt> Fn( - &'stmt mut Statement<'conn>, - ) - -> Result, LocalGenerationError>, - >; - - /// Iterator of SQL results. - pub struct SqlResults<'conn, ItemT> { - stmt: Statement<'conn>, - create_iter: CreateIterFn<'conn, ItemT>, - } - - impl<'conn, ItemT> SqlResults<'conn, ItemT> { - fn new( - conn: &'conn Connection, - statement: &str, - create_iter: CreateIterFn<'conn, ItemT>, - ) -> Result { - let stmt = conn.prepare(statement)?; - Ok(Self { stmt, create_iter }) - } - - /// Create an iterator over results. - pub fn iter(&'_ mut self) -> Result, LocalGenerationError> { - (self.create_iter)(&mut self.stmt) - } - } - - /// Return all file system entries in database. - pub fn files(conn: &Connection) -> Result, LocalGenerationError> { - SqlResults::new( - conn, - "SELECT * FROM files", - Box::new(|stmt| { - let iter = stmt.query_map(params![], row_to_entry)?; - let iter = iter.map(|x| match x { - Ok((fileno, json, reason)) => serde_json::from_str(&json) - .map(|entry| BackedUpFile::new(fileno, entry, &reason)) - .map_err(|e| e.into()), - Err(e) => Err(e.into()), - }); - Ok(Box::new(iter)) - }), - ) - } - - /// Return all chunk ids in database. - pub fn chunkids( - conn: &Connection, - fileno: FileId, - ) -> Result, LocalGenerationError> { - SqlResults::new( - conn, - "SELECT chunkid FROM chunks WHERE fileno = ?1", - Box::new(move |stmt| { - let iter = stmt.query_map(params![fileno], |row| row.get(0))?; - let iter = iter.map(|x| { - let fileno: String = x?; - Ok(ChunkId::from(&fileno)) - }); - Ok(Box::new(iter)) - }), - ) - } - - /// Get a file's information given its path. - pub fn get_file( - conn: &Connection, - filename: &Path, - ) -> Result, LocalGenerationError> { - match get_file_and_fileno(conn, filename)? { - None => Ok(None), - Some((_, e, _)) => Ok(Some(e)), - } - } - - /// Get a file's information given it's id in the database. - pub fn get_fileno( - conn: &Connection, - filename: &Path, - ) -> Result, LocalGenerationError> { - match get_file_and_fileno(conn, filename)? { - None => Ok(None), - Some((id, _, _)) => Ok(Some(id)), - } - } - - fn get_file_and_fileno( - conn: &Connection, - filename: &Path, - ) -> Result, LocalGenerationError> { - let mut stmt = conn.prepare("SELECT * FROM files WHERE filename = ?1")?; - let mut iter = stmt.query_map(params![path_into_blob(filename)], row_to_entry)?; - match iter.next() { - None => Ok(None), - Some(Err(e)) => { - debug!("database lookup error: {}", e); - Err(e.into()) - } - Some(Ok((fileno, json, reason))) => { - let entry = serde_json::from_str(&json)?; - if iter.next() == None { - Ok(Some((fileno, entry, reason))) - } else { - debug!("too many files in file lookup"); - Err(LocalGenerationError::TooManyFiles(filename.to_path_buf())) - } - } - } - } - - /// Does a path refer to a cache directory? - pub fn is_cachedir_tag( - conn: &Connection, - filename: &Path, - ) -> Result { - let mut stmt = conn.prepare("SELECT is_cachedir_tag FROM files WHERE filename = ?1")?; - let mut iter = stmt.query_map(params![path_into_blob(filename)], |row| row.get(0))?; - match iter.next() { - // File is missing, so it's definitely not a CACHEDIR.TAG - None => Ok(false), - Some(result) => Ok(result?), - } - } -} - #[cfg(test)] mod test { use super::{LocalGeneration, NascentGeneration}; @@ -681,7 +431,7 @@ mod test { use crate::{ backup_reason::Reason, backup_run::FsEntryBackupOutcome, fsentry::FilesystemEntry, }; - use std::{fs::metadata, mem::drop, path::Path}; + use std::{fs::metadata, path::Path}; // Create a `Metadata` structure to pass to other functions (we don't care about the // contents) @@ -732,7 +482,7 @@ mod test { .unwrap(); } - drop(gen); + gen.close().unwrap(); let gen = LocalGeneration::open(dbfile).unwrap(); assert!(!gen.is_cachedir_tag(nontag_path1).unwrap()); -- cgit v1.2.1 From 0ee823e00a29ca15ac2699ec588f9c1ff2c7b04d Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Thu, 3 Mar 2022 18:51:38 +0200 Subject: perf: cache user and group name lookups Sponsored-by: author --- src/fsentry.rs | 35 ++++++++++++++++++----------------- src/fsiter.rs | 23 +++++++++++++++-------- src/generation.rs | 9 +++++---- 3 files changed, 38 insertions(+), 29 deletions(-) diff --git a/src/fsentry.rs b/src/fsentry.rs index 9f40fd0..90afd70 100644 --- a/src/fsentry.rs +++ b/src/fsentry.rs @@ -8,6 +8,7 @@ use std::fs::{FileType, Metadata}; use std::os::unix::ffi::OsStringExt; use std::os::unix::fs::FileTypeExt; use std::path::{Path, PathBuf}; +use users::{Groups, Users, UsersCache}; #[cfg(target_os = "linux")] use std::os::linux::fs::MetadataExt; @@ -69,7 +70,11 @@ pub enum FsEntryError { #[allow(clippy::len_without_is_empty)] impl FilesystemEntry { /// Create an `FsEntry` from a file's metadata. - pub fn from_metadata(path: &Path, meta: &Metadata) -> Result { + pub fn from_metadata( + path: &Path, + meta: &Metadata, + cache: &mut UsersCache, + ) -> Result { let kind = FilesystemKind::from_file_type(meta.file_type()); let symlink_target = if kind == FilesystemKind::Symlink { debug!("reading symlink target for {:?}", path); @@ -82,6 +87,16 @@ impl FilesystemEntry { let uid = meta.st_uid(); let gid = meta.st_gid(); + let user: String = if let Some(user) = cache.get_user_by_uid(uid) { + user.name().to_string_lossy().to_string() + } else { + "".to_string() + }; + let group = if let Some(group) = cache.get_group_by_gid(gid) { + group.name().to_string_lossy().to_string() + } else { + "".to_string() + }; Ok(Self { path: path.to_path_buf().into_os_string().into_vec(), @@ -95,8 +110,8 @@ impl FilesystemEntry { symlink_target, uid, gid, - user: get_username(uid), - group: get_groupname(gid), + user, + group, }) } @@ -152,20 +167,6 @@ impl FilesystemEntry { } } -fn get_username(uid: u32) -> String { - match users::get_user_by_uid(uid) { - None => "".to_string(), - Some(user) => user.name().to_os_string().to_string_lossy().into_owned(), - } -} - -fn get_groupname(gid: u32) -> String { - match users::get_group_by_gid(gid) { - None => "".to_string(), - Some(group) => group.name().to_os_string().to_string_lossy().into_owned(), - } -} - /// Different types of file system entries. #[derive(Debug, Copy, Clone, PartialEq, Serialize, Deserialize)] pub enum FilesystemKind { diff --git a/src/fsiter.rs b/src/fsiter.rs index 2747cce..8b231c1 100644 --- a/src/fsiter.rs +++ b/src/fsiter.rs @@ -1,8 +1,9 @@ //! Iterate over directory tree. use crate::fsentry::{FilesystemEntry, FsEntryError}; -use log::{debug, warn}; +use log::warn; use std::path::{Path, PathBuf}; +use users::UsersCache; use walkdir::{DirEntry, IntoIter, WalkDir}; /// Filesystem entry along with additional info about it. @@ -56,6 +57,7 @@ impl Iterator for FsIterator { /// Cachedir-aware adaptor for WalkDir: it skips the contents of dirs that contain CACHEDIR.TAG, /// but still yields entries for the dir and the tag themselves. struct SkipCachedirs { + cache: UsersCache, iter: IntoIter, exclude_cache_tag_directories: bool, // This is the last tag we've found. `next()` will yield it before asking `iter` for more @@ -66,6 +68,7 @@ struct SkipCachedirs { impl SkipCachedirs { fn new(iter: IntoIter, exclude_cache_tag_directories: bool) -> Self { Self { + cache: UsersCache::new(), iter, exclude_cache_tag_directories, cachedir_tag: None, @@ -109,7 +112,7 @@ impl SkipCachedirs { if content == CACHEDIR_TAG { self.iter.skip_current_dir(); - self.cachedir_tag = Some(new_entry(&tag_path, true)); + self.cachedir_tag = Some(new_entry(&tag_path, true, &mut self.cache)); } } } @@ -120,22 +123,26 @@ impl Iterator for SkipCachedirs { fn next(&mut self) -> Option { self.cachedir_tag.take().or_else(|| { let next = self.iter.next(); - debug!("walkdir found: {:?}", next); + // debug!("walkdir found: {:?}", next); match next { None => None, Some(Err(err)) => Some(Err(FsIterError::WalkDir(err))), Some(Ok(entry)) => { self.try_enqueue_cachedir_tag(&entry); - Some(new_entry(entry.path(), false)) + Some(new_entry(entry.path(), false, &mut self.cache)) } } }) } } -fn new_entry(path: &Path, is_cachedir_tag: bool) -> Result { +fn new_entry( + path: &Path, + is_cachedir_tag: bool, + cache: &mut UsersCache, +) -> Result { let meta = std::fs::symlink_metadata(path); - debug!("metadata for {:?}: {:?}", path, meta); + // debug!("metadata for {:?}: {:?}", path, meta); let meta = match meta { Ok(meta) => meta, Err(err) => { @@ -143,8 +150,8 @@ fn new_entry(path: &Path, is_cachedir_tag: bool) -> Result Date: Thu, 3 Mar 2022 18:51:38 +0200 Subject: perf: reduce logging for, for performance Sponsored-by: author --- src/fsiter.rs | 3 --- src/policy.rs | 6 +----- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/fsiter.rs b/src/fsiter.rs index 8b231c1..ef2886d 100644 --- a/src/fsiter.rs +++ b/src/fsiter.rs @@ -123,7 +123,6 @@ impl Iterator for SkipCachedirs { fn next(&mut self) -> Option { self.cachedir_tag.take().or_else(|| { let next = self.iter.next(); - // debug!("walkdir found: {:?}", next); match next { None => None, Some(Err(err)) => Some(Err(FsIterError::WalkDir(err))), @@ -142,7 +141,6 @@ fn new_entry( cache: &mut UsersCache, ) -> Result { let meta = std::fs::symlink_metadata(path); - // debug!("metadata for {:?}: {:?}", path, meta); let meta = match meta { Ok(meta) => meta, Err(err) => { @@ -151,7 +149,6 @@ fn new_entry( } }; let entry = FilesystemEntry::from_metadata(path, &meta, cache)?; - // debug!("FileSystemEntry for {:?}: {:?}", path, entry); let annotated = AnnotatedFsEntry { inner: entry, is_cachedir_tag, diff --git a/src/policy.rs b/src/policy.rs index 7241f0f..bb98a48 100644 --- a/src/policy.rs +++ b/src/policy.rs @@ -3,7 +3,7 @@ use crate::backup_reason::Reason; use crate::fsentry::FilesystemEntry; use crate::generation::LocalGeneration; -use log::{debug, warn}; +use log::warn; /// Policy for what gets backed up. /// @@ -59,10 +59,6 @@ impl BackupPolicy { Reason::GenerationLookupError } }; - debug!( - "needs_backup: file {:?}: policy decision: {}", - new_name, reason - ); reason } } -- cgit v1.2.1 From 62f2e1583c77e1f182740f22d3cf60ac6eaab4af Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Thu, 3 Mar 2022 19:19:39 +0200 Subject: refactor: add constant for showing/hiding progress reporting This is clearer than editing literal values in the functions. Sponsored-by: author --- src/backup_progress.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/backup_progress.rs b/src/backup_progress.rs index 52430e4..f119210 100644 --- a/src/backup_progress.rs +++ b/src/backup_progress.rs @@ -4,6 +4,8 @@ use crate::generation::GenId; use indicatif::{ProgressBar, ProgressStyle}; use std::path::Path; +const SHOW_PROGRESS: bool = true; + /// A progress bar abstraction specific to backups. /// /// The progress bar is different for initial and incremental backups, @@ -15,7 +17,7 @@ pub struct BackupProgress { impl BackupProgress { /// Create a progress bar for an initial backup. pub fn initial() -> Self { - let progress = if true { + let progress = if SHOW_PROGRESS { ProgressBar::new(0) } else { ProgressBar::hidden() @@ -35,7 +37,7 @@ impl BackupProgress { /// Create a progress bar for an incremental backup. pub fn incremental() -> Self { - let progress = if true { + let progress = if SHOW_PROGRESS { ProgressBar::new(0) } else { ProgressBar::hidden() -- cgit v1.2.1 From 99d516f46bce4048633b85ff2ce0c2446673208e Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Fri, 4 Mar 2022 08:15:21 +0200 Subject: test: show cargo, rustc versions in ./check -v Sponsored-by: author --- check | 3 +++ 1 file changed, 3 insertions(+) diff --git a/check b/check index 2da9a65..905bc5b 100755 --- a/check +++ b/check @@ -42,6 +42,9 @@ require_cmd pdflatex # daemonize installation location changed from Debian 10 to 11. require_cmd daemonize || require_cmd /usr/sbin/daemonize +$hideok cargo --version +$hideok rustc --version + got_cargo_cmd clippy && cargo clippy --all-targets -q $hideok cargo build --all-targets got_cargo_cmd fmt && $hideok cargo fmt -- --check -- cgit v1.2.1