From a2bee6568dee4c23ed008e657f38352da4190f24 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sun, 20 Mar 2022 07:52:38 +0200 Subject: feat: allow user to choose backup schema version for new backups The way this is currently implemented resulted in a lot of code duplication in src/generation.rs. This should be refactored later. My first attempt to do it by adding a trait for a schema variant failed. Sponsored-by: author --- src/dbgen.rs | 507 +++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 462 insertions(+), 45 deletions(-) (limited to 'src/dbgen.rs') diff --git a/src/dbgen.rs b/src/dbgen.rs index 7e54d7d..816ea11 100644 --- a/src/dbgen.rs +++ b/src/dbgen.rs @@ -4,16 +4,28 @@ use crate::backup_reason::Reason; use crate::chunkid::ChunkId; use crate::db::{Column, Database, DatabaseError, SqlResults, Table, Value}; use crate::fsentry::FilesystemEntry; +use crate::genmeta::{GenerationMeta, GenerationMetaError}; +use crate::schema::{SchemaVersion, VersionComponent}; use log::error; use std::collections::HashMap; use std::os::unix::ffi::OsStrExt; use std::path::{Path, PathBuf}; -/// Current generation database schema major version. -pub const SCHEMA_MAJOR: u32 = 0; +/// Return latest supported schema version for a supported major +/// version. +pub fn schema_version(major: VersionComponent) -> Result { + match major { + 0 => Ok(SchemaVersion::new(0, 0)), + 1 => Ok(SchemaVersion::new(1, 0)), + _ => Err(GenerationDbError::Unsupported(major)), + } +} + +/// Default database schema major version.a +pub const DEFAULT_SCHEMA_MAJOR: VersionComponent = V0_0::MAJOR; -/// Current generation database schema minor version. -pub const SCHEMA_MINOR: u32 = 0; +/// Major schema versions supported by this version of Obnam. +pub const SCHEMA_MAJORS: &[VersionComponent] = &[0, 1]; /// An identifier for a file in a generation. pub type FileId = u64; @@ -37,15 +49,23 @@ pub enum GenerationDbError { #[error("Generation 'meta' row {0} has badly formed integer: {1}")] BadMetaInteger(String, std::num::ParseIntError), + /// A major schema version is unsupported. + #[error("Unsupported backup schema major version: {0}")] + Unsupported(VersionComponent), + /// Local generation uses a schema version that this version of /// Obnam isn't compatible with. #[error("Backup is not compatible with this version of Obnam: {0}.{1}")] - Incompatible(u32, u32), + Incompatible(VersionComponent, VersionComponent), /// Error from a database #[error(transparent)] Database(#[from] DatabaseError), + /// Error from generation metadata. + #[error(transparent)] + GenerationMeta(#[from] GenerationMetaError), + /// Error from JSON. #[error(transparent)] SerdeJsonError(#[from] serde_json::Error), @@ -57,6 +77,157 @@ pub enum GenerationDbError { /// A database representing a backup generation. pub struct GenerationDb { + variant: GenerationDbVariant, +} + +enum GenerationDbVariant { + V0_0(V0_0), + V1_0(V1_0), +} + +impl GenerationDb { + /// Create a new generation database in read/write mode. + pub fn create>( + filename: P, + schema: SchemaVersion, + ) -> Result { + let meta_table = Self::meta_table(); + let variant = match schema.version() { + (V0_0::MAJOR, V0_0::MINOR) => { + GenerationDbVariant::V0_0(V0_0::create(filename, meta_table)?) + } + (V1_0::MAJOR, V1_0::MINOR) => { + GenerationDbVariant::V1_0(V1_0::create(filename, meta_table)?) + } + (major, minor) => return Err(GenerationDbError::Incompatible(major, minor)), + }; + Ok(Self { variant }) + } + + /// Open an existing generation database in read-only mode. + pub fn open>(filename: P) -> Result { + let filename = filename.as_ref(); + let meta_table = Self::meta_table(); + let schema = { + let plain_db = Database::open(filename)?; + let rows = Self::meta_rows(&plain_db, &meta_table)?; + GenerationMeta::from(rows)?.schema_version() + }; + let variant = match schema.version() { + (V0_0::MAJOR, V0_0::MINOR) => { + GenerationDbVariant::V0_0(V0_0::open(filename, meta_table)?) + } + (V1_0::MAJOR, V1_0::MINOR) => { + GenerationDbVariant::V1_0(V1_0::open(filename, meta_table)?) + } + (major, minor) => return Err(GenerationDbError::Incompatible(major, minor)), + }; + Ok(Self { variant }) + } + + fn meta_table() -> Table { + Table::new("meta") + .column(Column::text("key")) + .column(Column::text("value")) + .build() + } + + fn meta_rows( + db: &Database, + table: &Table, + ) -> Result, GenerationDbError> { + let mut map = HashMap::new(); + let mut iter = db.all_rows(table, &row_to_kv)?; + for kv in iter.iter()? { + let (key, value) = kv?; + map.insert(key, value); + } + Ok(map) + } + + /// Close a database, commit any changes. + pub fn close(self) -> Result<(), GenerationDbError> { + match self.variant { + GenerationDbVariant::V0_0(v) => v.close(), + GenerationDbVariant::V1_0(v) => v.close(), + } + } + + /// Return contents of "meta" table as a HashMap. + pub fn meta(&self) -> Result, GenerationDbError> { + match &self.variant { + GenerationDbVariant::V0_0(v) => v.meta(), + GenerationDbVariant::V1_0(v) => v.meta(), + } + } + + /// Insert a file system entry into the database. + pub fn insert( + &mut self, + e: FilesystemEntry, + fileid: FileId, + ids: &[ChunkId], + reason: Reason, + is_cachedir_tag: bool, + ) -> Result<(), GenerationDbError> { + match &mut self.variant { + GenerationDbVariant::V0_0(v) => v.insert(e, fileid, ids, reason, is_cachedir_tag), + GenerationDbVariant::V1_0(v) => v.insert(e, fileid, ids, reason, is_cachedir_tag), + } + } + + /// Count number of file system entries. + pub fn file_count(&self) -> Result { + match &self.variant { + GenerationDbVariant::V0_0(v) => v.file_count(), + GenerationDbVariant::V1_0(v) => v.file_count(), + } + } + + /// Does a path refer to a cache directory? + pub fn is_cachedir_tag(&self, filename: &Path) -> Result { + match &self.variant { + GenerationDbVariant::V0_0(v) => v.is_cachedir_tag(filename), + GenerationDbVariant::V1_0(v) => v.is_cachedir_tag(filename), + } + } + + /// Return all chunk ids in database. + pub fn chunkids(&self, fileid: FileId) -> Result, GenerationDbError> { + match &self.variant { + GenerationDbVariant::V0_0(v) => v.chunkids(fileid), + GenerationDbVariant::V1_0(v) => v.chunkids(fileid), + } + } + + /// Return all file descriptions in database. + pub fn files( + &self, + ) -> Result, GenerationDbError> { + match &self.variant { + GenerationDbVariant::V0_0(v) => v.files(), + GenerationDbVariant::V1_0(v) => v.files(), + } + } + + /// Get a file's information given its path. + pub fn get_file(&self, filename: &Path) -> Result, GenerationDbError> { + match &self.variant { + GenerationDbVariant::V0_0(v) => v.get_file(filename), + GenerationDbVariant::V1_0(v) => v.get_file(filename), + } + } + + /// Get a file's information given its id in the database. + pub fn get_fileno(&self, filename: &Path) -> Result, GenerationDbError> { + match &self.variant { + GenerationDbVariant::V0_0(v) => v.get_fileno(filename), + GenerationDbVariant::V1_0(v) => v.get_fileno(filename), + } + } +} + +struct V0_0 { created: bool, db: Database, meta: Table, @@ -64,36 +235,35 @@ pub struct GenerationDb { chunks: Table, } -impl GenerationDb { +impl V0_0 { + const MAJOR: VersionComponent = 0; + const MINOR: VersionComponent = 0; + /// Create a new generation database in read/write mode. - pub fn create>(filename: P) -> Result { + pub fn create>(filename: P, meta: Table) -> Result { let db = Database::create(filename.as_ref())?; - let mut moi = Self::new(db); + let mut moi = Self::new(db, meta); moi.created = true; moi.create_tables()?; Ok(moi) } /// Open an existing generation database in read-only mode. - pub fn open>(filename: P) -> Result { + pub fn open>(filename: P, meta: Table) -> Result { let db = Database::open(filename.as_ref())?; - Ok(Self::new(db)) + Ok(Self::new(db, meta)) } - fn new(db: Database) -> Self { - let meta = Table::new("meta") - .column(Column::text("key")) - .column(Column::text("value")) - .build(); + fn new(db: Database, meta: Table) -> Self { let files = Table::new("files") - .column(Column::primary_key("fileno")) // FIXME: rename to fileid + .column(Column::primary_key("fileno")) .column(Column::blob("filename")) .column(Column::text("json")) .column(Column::text("reason")) .column(Column::bool("is_cachedir_tag")) .build(); let chunks = Table::new("chunks") - .column(Column::int("fileno")) // FIXME: rename to fileid + .column(Column::int("fileno")) .column(Column::text("chunkid")) .build(); @@ -115,14 +285,14 @@ impl GenerationDb { &self.meta, &[ Value::text("key", "schema_version_major"), - Value::text("value", &format!("{}", SCHEMA_MAJOR)), + Value::text("value", &format!("{}", Self::MAJOR)), ], )?; self.db.insert( &self.meta, &[ Value::text("key", "schema_version_minor"), - Value::text("value", &format!("{}", SCHEMA_MINOR)), + Value::text("value", &format!("{}", Self::MINOR)), ], )?; @@ -187,7 +357,7 @@ impl GenerationDb { // FIXME: this needs to be done use "SELECT count(*) FROM // files", but the Database abstraction doesn't support that // yet. - let mut iter = self.db.all_rows(&self.files, &row_to_entry)?; + let mut iter = self.db.all_rows(&self.files, &Self::row_to_entry)?; let mut count = 0; for _ in iter.iter()? { count += 1; @@ -199,7 +369,9 @@ impl GenerationDb { pub fn is_cachedir_tag(&self, filename: &Path) -> Result { let filename_vec = path_into_blob(filename); let value = Value::blob("filename", &filename_vec); - let mut rows = self.db.some_rows(&self.files, &value, &row_to_entry)?; + let mut rows = self + .db + .some_rows(&self.files, &value, &Self::row_to_entry)?; let mut iter = rows.iter()?; if let Some(row) = iter.next() { @@ -228,7 +400,7 @@ impl GenerationDb { pub fn files( &self, ) -> Result, GenerationDbError> { - Ok(self.db.all_rows(&self.files, &row_to_fsentry)?) + Ok(self.db.all_rows(&self.files, &Self::row_to_fsentry)?) } /// Get a file's information given its path. @@ -253,7 +425,9 @@ impl GenerationDb { ) -> Result, GenerationDbError> { let filename_bytes = path_into_blob(filename); let value = Value::blob("filename", &filename_bytes); - let mut rows = self.db.some_rows(&self.files, &value, &row_to_entry)?; + let mut rows = self + .db + .some_rows(&self.files, &value, &Self::row_to_entry)?; let mut iter = rows.iter()?; if let Some(row) = iter.next() { @@ -272,6 +446,271 @@ impl GenerationDb { Ok(None) } } + + fn row_to_entry(row: &rusqlite::Row) -> rusqlite::Result<(FileId, String, String, bool)> { + let fileno: FileId = row.get("fileno")?; + let json: String = row.get("json")?; + let reason: String = row.get("reason")?; + let is_cachedir_tag: bool = row.get("is_cachedir_tag")?; + Ok((fileno, json, reason, is_cachedir_tag)) + } + + fn row_to_fsentry( + row: &rusqlite::Row, + ) -> rusqlite::Result<(FileId, FilesystemEntry, Reason, bool)> { + let fileno: FileId = row.get("fileno")?; + let json: String = row.get("json")?; + let entry = serde_json::from_str(&json).map_err(|err| { + rusqlite::Error::FromSqlConversionFailure(0, rusqlite::types::Type::Text, Box::new(err)) + })?; + let reason: String = row.get("reason")?; + let reason = Reason::from(&reason); + let is_cachedir_tag: bool = row.get("is_cachedir_tag")?; + Ok((fileno, entry, reason, is_cachedir_tag)) + } +} + +struct V1_0 { + created: bool, + db: Database, + meta: Table, + files: Table, + chunks: Table, +} + +impl V1_0 { + const MAJOR: VersionComponent = 1; + const MINOR: VersionComponent = 0; + + /// Create a new generation database in read/write mode. + pub fn create>(filename: P, meta: Table) -> Result { + let db = Database::create(filename.as_ref())?; + let mut moi = Self::new(db, meta); + moi.created = true; + moi.create_tables()?; + Ok(moi) + } + + /// Open an existing generation database in read-only mode. + pub fn open>(filename: P, meta: Table) -> Result { + let db = Database::open(filename.as_ref())?; + Ok(Self::new(db, meta)) + } + + fn new(db: Database, meta: Table) -> Self { + let files = Table::new("files") + .column(Column::primary_key("fileid")) + .column(Column::blob("filename")) + .column(Column::text("json")) + .column(Column::text("reason")) + .column(Column::bool("is_cachedir_tag")) + .build(); + let chunks = Table::new("chunks") + .column(Column::int("fileid")) + .column(Column::text("chunkid")) + .build(); + + Self { + created: false, + db, + meta, + files, + chunks, + } + } + + fn create_tables(&mut self) -> Result<(), GenerationDbError> { + self.db.create_table(&self.meta)?; + self.db.create_table(&self.files)?; + self.db.create_table(&self.chunks)?; + + self.db.insert( + &self.meta, + &[ + Value::text("key", "schema_version_major"), + Value::text("value", &format!("{}", Self::MAJOR)), + ], + )?; + self.db.insert( + &self.meta, + &[ + Value::text("key", "schema_version_minor"), + Value::text("value", &format!("{}", Self::MINOR)), + ], + )?; + + Ok(()) + } + + /// Close a database, commit any changes. + pub fn close(self) -> Result<(), GenerationDbError> { + if self.created { + self.db + .create_index("filenames_idx", &self.files, "filename")?; + self.db.create_index("fileid_idx", &self.chunks, "fileid")?; + } + self.db.close().map_err(GenerationDbError::Database) + } + + /// Return contents of "meta" table as a HashMap. + pub fn meta(&self) -> Result, GenerationDbError> { + let mut map = HashMap::new(); + let mut iter = self.db.all_rows(&self.meta, &row_to_kv)?; + for kv in iter.iter()? { + let (key, value) = kv?; + map.insert(key, value); + } + Ok(map) + } + + /// Insert a file system entry into the database. + pub fn insert( + &mut self, + e: FilesystemEntry, + fileid: FileId, + ids: &[ChunkId], + reason: Reason, + is_cachedir_tag: bool, + ) -> Result<(), GenerationDbError> { + let json = serde_json::to_string(&e)?; + self.db.insert( + &self.files, + &[ + Value::primary_key("fileid", fileid), + Value::blob("filename", &path_into_blob(&e.pathbuf())), + Value::text("json", &json), + Value::text("reason", &format!("{}", reason)), + Value::bool("is_cachedir_tag", is_cachedir_tag), + ], + )?; + for id in ids { + self.db.insert( + &self.chunks, + &[ + Value::int("fileid", fileid), + Value::text("chunkid", &format!("{}", id)), + ], + )?; + } + Ok(()) + } + + /// Count number of file system entries. + pub fn file_count(&self) -> Result { + // FIXME: this needs to be done use "SELECT count(*) FROM + // files", but the Database abstraction doesn't support that + // yet. + let mut iter = self.db.all_rows(&self.files, &Self::row_to_entry)?; + let mut count = 0; + for _ in iter.iter()? { + count += 1; + } + Ok(count) + } + + /// Does a path refer to a cache directory? + pub fn is_cachedir_tag(&self, filename: &Path) -> Result { + let filename_vec = path_into_blob(filename); + let value = Value::blob("filename", &filename_vec); + let mut rows = self + .db + .some_rows(&self.files, &value, &Self::row_to_entry)?; + let mut iter = rows.iter()?; + + if let Some(row) = iter.next() { + // Make sure there's only one row for a given filename. A + // bug in a previous version, or a maliciously constructed + // generation, could result in there being more than one. + if iter.next().is_some() { + error!("too many files in file lookup"); + Err(GenerationDbError::TooManyFiles(filename.to_path_buf())) + } else { + let (_, _, _, is_cachedir_tag) = row?; + Ok(is_cachedir_tag) + } + } else { + Ok(false) + } + } + + /// Return all chunk ids in database. + pub fn chunkids(&self, fileid: FileId) -> Result, GenerationDbError> { + let fileid = Value::int("fileid", fileid); + Ok(self.db.some_rows(&self.chunks, &fileid, &row_to_chunkid)?) + } + + /// Return all file descriptions in database. + pub fn files( + &self, + ) -> Result, GenerationDbError> { + Ok(self.db.all_rows(&self.files, &Self::row_to_fsentry)?) + } + + /// Get a file's information given its path. + pub fn get_file(&self, filename: &Path) -> Result, GenerationDbError> { + match self.get_file_and_fileno(filename)? { + None => Ok(None), + Some((_, e, _)) => Ok(Some(e)), + } + } + + /// Get a file's information given its id in the database. + pub fn get_fileno(&self, filename: &Path) -> Result, GenerationDbError> { + match self.get_file_and_fileno(filename)? { + None => Ok(None), + Some((id, _, _)) => Ok(Some(id)), + } + } + + fn get_file_and_fileno( + &self, + filename: &Path, + ) -> Result, GenerationDbError> { + let filename_bytes = path_into_blob(filename); + let value = Value::blob("filename", &filename_bytes); + let mut rows = self + .db + .some_rows(&self.files, &value, &Self::row_to_entry)?; + let mut iter = rows.iter()?; + + if let Some(row) = iter.next() { + // Make sure there's only one row for a given filename. A + // bug in a previous version, or a maliciously constructed + // generation, could result in there being more than one. + if iter.next().is_some() { + error!("too many files in file lookup"); + Err(GenerationDbError::TooManyFiles(filename.to_path_buf())) + } else { + let (fileid, ref json, ref reason, _) = row?; + let entry = serde_json::from_str(json)?; + Ok(Some((fileid, entry, reason.to_string()))) + } + } else { + Ok(None) + } + } + + fn row_to_entry(row: &rusqlite::Row) -> rusqlite::Result<(FileId, String, String, bool)> { + let fileno: FileId = row.get("fileid")?; + let json: String = row.get("json")?; + let reason: String = row.get("reason")?; + let is_cachedir_tag: bool = row.get("is_cachedir_tag")?; + Ok((fileno, json, reason, is_cachedir_tag)) + } + + fn row_to_fsentry( + row: &rusqlite::Row, + ) -> rusqlite::Result<(FileId, FilesystemEntry, Reason, bool)> { + let fileno: FileId = row.get("fileid")?; + let json: String = row.get("json")?; + let entry = serde_json::from_str(&json).map_err(|err| { + rusqlite::Error::FromSqlConversionFailure(0, rusqlite::types::Type::Text, Box::new(err)) + })?; + let reason: String = row.get("reason")?; + let reason = Reason::from(&reason); + let is_cachedir_tag: bool = row.get("is_cachedir_tag")?; + Ok((fileno, entry, reason, is_cachedir_tag)) + } } fn row_to_kv(row: &rusqlite::Row) -> rusqlite::Result<(String, String)> { @@ -284,28 +723,6 @@ fn path_into_blob(path: &Path) -> Vec { path.as_os_str().as_bytes().to_vec() } -fn row_to_entry(row: &rusqlite::Row) -> rusqlite::Result<(FileId, String, String, bool)> { - let fileno: FileId = row.get("fileno")?; - let json: String = row.get("json")?; - let reason: String = row.get("reason")?; - let is_cachedir_tag: bool = row.get("is_cachedir_tag")?; - Ok((fileno, json, reason, is_cachedir_tag)) -} - -fn row_to_fsentry( - row: &rusqlite::Row, -) -> rusqlite::Result<(FileId, FilesystemEntry, Reason, bool)> { - let fileno: FileId = row.get("fileno")?; - let json: String = row.get("json")?; - let entry = serde_json::from_str(&json).map_err(|err| { - rusqlite::Error::FromSqlConversionFailure(0, rusqlite::types::Type::Text, Box::new(err)) - })?; - let reason: String = row.get("reason")?; - let reason = Reason::from(&reason); - let is_cachedir_tag: bool = row.get("is_cachedir_tag")?; - Ok((fileno, entry, reason, is_cachedir_tag)) -} - fn row_to_chunkid(row: &rusqlite::Row) -> rusqlite::Result { let chunkid: String = row.get("chunkid")?; let chunkid = ChunkId::recreate(&chunkid); -- cgit v1.2.1