From aad50a0827b5c74229153395f01e0eafdd64edf6 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Tue, 3 May 2022 09:38:34 +0300 Subject: refactor: add a type for plain integers we store in a database This will make it easier to change later, if need be. We may want to do that for various reasons, such as to save space. We may also want to change things to only use integer types that SQLite can handle: u64 is currently not well handled by our database layer. However, as this is a refactor, there's no change or fix to that. FileId is now explicitly a database integer. This doesn't break anything, for now, as the underlying integer type is still u64. Also, change a couple of places where it will matter if DbInt changes away from u64, and disable warnings for harmless conversions that may cause warnings depending on what type DbInt has. Sponsored-by: author --- src/cmd/restore.rs | 2 +- src/cmd/show_gen.rs | 5 +++-- src/db.rs | 27 ++++++++++++++++----------- src/dbgen.rs | 6 +++--- 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/src/cmd/restore.rs b/src/cmd/restore.rs index 4a637da..223d481 100644 --- a/src/cmd/restore.rs +++ b/src/cmd/restore.rs @@ -297,7 +297,7 @@ fn path_to_cstring(path: &Path) -> CString { fn create_progress_bar(file_count: FileId, verbose: bool) -> ProgressBar { let progress = if verbose { - ProgressBar::new(file_count) + ProgressBar::new(file_count as u64) } else { ProgressBar::hidden() }; diff --git a/src/cmd/show_gen.rs b/src/cmd/show_gen.rs index 98c57fc..f47a07b 100644 --- a/src/cmd/show_gen.rs +++ b/src/cmd/show_gen.rs @@ -3,6 +3,7 @@ use crate::chunk::ClientTrust; use crate::client::BackupClient; use crate::config::ClientConfig; +use crate::db::DbInt; use crate::error::ObnamError; use crate::fsentry::FilesystemKind; use crate::generation::GenId; @@ -66,7 +67,7 @@ impl ShowGeneration { #[derive(Debug, Default, Serialize)] struct Output { generation_id: String, - file_count: u64, + file_count: DbInt, file_bytes: String, file_bytes_raw: u64, db_bytes: String, @@ -81,7 +82,7 @@ impl Output { } } - fn file_count(mut self, n: u64) -> Self { + fn file_count(mut self, n: DbInt) -> Self { self.file_count = n; self } diff --git a/src/db.rs b/src/db.rs index b4de399..ab638a9 100644 --- a/src/db.rs +++ b/src/db.rs @@ -353,13 +353,16 @@ impl Column { } } +/// Type of plain integers that can be stored. +pub type DbInt = u64; + /// A value in a named column. #[derive(Debug)] pub enum Value<'a> { /// An integer primary key. - PrimaryKey(&'a str, u64), + PrimaryKey(&'a str, DbInt), /// An integer. - Int(&'a str, u64), + Int(&'a str, DbInt), /// A text string. Text(&'a str, &'a str), /// A binary string. @@ -381,12 +384,12 @@ impl<'a> Value<'a> { } /// Create an integer primary key value. - pub fn primary_key(name: &'a str, value: u64) -> Self { + pub fn primary_key(name: &'a str, value: DbInt) -> Self { Self::PrimaryKey(name, value) } /// Create an integer value. - pub fn int(name: &'a str, value: u64) -> Self { + pub fn int(name: &'a str, value: DbInt) -> Self { Self::Int(name, value) } @@ -406,6 +409,7 @@ impl<'a> Value<'a> { } } +#[allow(clippy::useless_conversion)] impl<'a> ToSql for Value<'a> { // The trait defines to_sql to return a Result. However, for our // particular case, to_sql can't ever fail. We only store values @@ -438,9 +442,9 @@ impl<'a> ToSql for Value<'a> { /// Like a Value, but owns the data. pub enum OwnedValue { /// An integer primary key. - PrimaryKey(String, u64), + PrimaryKey(String, DbInt), /// An integer. - Int(String, u64), + Int(String, DbInt), /// A text string. Text(String, String), /// A binary string. @@ -462,6 +466,7 @@ impl From<&Value<'_>> for OwnedValue { } impl ToSql for OwnedValue { + #[allow(clippy::useless_conversion)] fn to_sql(&self) -> rusqlite::Result { use rusqlite::types::Value; let v = match self { @@ -547,7 +552,7 @@ mod test { use std::path::Path; use tempfile::tempdir; - fn get_bar(row: &rusqlite::Row) -> Result { + fn get_bar(row: &rusqlite::Row) -> Result { row.get("bar") } @@ -566,12 +571,12 @@ mod test { Database::open(file).unwrap() } - fn insert(db: &mut Database, value: u64) { + fn insert(db: &mut Database, value: DbInt) { let table = table(); db.insert(&table, &[Value::int("bar", value)]).unwrap(); } - fn values(db: Database) -> Vec { + fn values(db: Database) -> Vec { let table = table(); let mut rows = db.all_rows(&table, &get_bar).unwrap(); let iter = rows.iter().unwrap(); @@ -606,7 +611,7 @@ mod test { #[test] fn inserts_many_rows() { - const N: u64 = 1000; + const N: DbInt = 1000; let tmp = tempdir().unwrap(); let filename = tmp.path().join("test.db"); @@ -618,7 +623,7 @@ mod test { let db = open_db(&filename); let values = values(db); - assert_eq!(values.len() as u64, N); + assert_eq!(values.len() as DbInt, N); let mut expected = vec![]; for i in 0..N { diff --git a/src/dbgen.rs b/src/dbgen.rs index 8e5ece5..0053d4a 100644 --- a/src/dbgen.rs +++ b/src/dbgen.rs @@ -2,7 +2,7 @@ use crate::backup_reason::Reason; use crate::chunkid::ChunkId; -use crate::db::{Column, Database, DatabaseError, SqlResults, Table, Value}; +use crate::db::{Column, Database, DatabaseError, DbInt, SqlResults, Table, Value}; use crate::fsentry::FilesystemEntry; use crate::genmeta::{GenerationMeta, GenerationMetaError}; use crate::label::LabelChecksumKind; @@ -28,8 +28,8 @@ pub const DEFAULT_SCHEMA_MAJOR: VersionComponent = V0_0::MAJOR; /// Major schema versions supported by this version of Obnam. pub const SCHEMA_MAJORS: &[VersionComponent] = &[0, 1]; -/// An identifier for a file in a generation. -pub type FileId = u64; +/// An integer identifier for a file in a generation. +pub type FileId = DbInt; /// Possible errors from using generation databases. #[derive(Debug, thiserror::Error)] -- cgit v1.2.1 From 856d32c448a87245234315462d2190c5a9aab549 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Tue, 3 May 2022 10:05:28 +0300 Subject: feat! only store signed 64-bit plain integers in database This is a breaking change, but allows to store the largest signed 64-bit integers in SQLite and get it back. Sponsored-by: author --- src/db.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/db.rs b/src/db.rs index ab638a9..46ba16a 100644 --- a/src/db.rs +++ b/src/db.rs @@ -354,7 +354,7 @@ impl Column { } /// Type of plain integers that can be stored. -pub type DbInt = u64; +pub type DbInt = i64; /// A value in a named column. #[derive(Debug)] @@ -631,4 +631,17 @@ mod test { } assert_eq!(values, expected); } + + #[test] + fn round_trips_int_max() { + let tmp = tempdir().unwrap(); + let filename = tmp.path().join("test.db"); + let mut db = create_db(&filename); + insert(&mut db, DbInt::MAX); + db.close().unwrap(); + + let db = open_db(&filename); + let values = values(db); + assert_eq!(values, vec![DbInt::MAX]); + } } -- cgit v1.2.1 From 3d50e1497dd07929655636cfea0c48ccadd3ab8e Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Tue, 3 May 2022 10:44:26 +0300 Subject: test: add test for storing, retrieving u64::MAX values in JSON The test passes. We create a FilesystemEntry with a length field containing u64::MAX, store that into a generation, and read it back. This works. The entry is serialised into JSON for storing in SQLite, and this proves we can handle any u64 value in an entry. serde_json deals with it fine, and we don't need to worry about it. Sponsored-by: author --- src/db.rs | 1 - src/fsentry.rs | 51 +++++++++++++++++++++++++++++++++++++++------------ src/generation.rs | 40 ++++++++++++++++++++++++++++++++++++++-- 3 files changed, 77 insertions(+), 15 deletions(-) diff --git a/src/db.rs b/src/db.rs index 46ba16a..da24e96 100644 --- a/src/db.rs +++ b/src/db.rs @@ -631,7 +631,6 @@ mod test { } assert_eq!(values, expected); } - #[test] fn round_trips_int_max() { let tmp = tempdir().unwrap(); diff --git a/src/fsentry.rs b/src/fsentry.rs index 90afd70..a04a3de 100644 --- a/src/fsentry.rs +++ b/src/fsentry.rs @@ -70,12 +70,19 @@ pub enum FsEntryError { #[allow(clippy::len_without_is_empty)] impl FilesystemEntry { /// Create an `FsEntry` from a file's metadata. - pub fn from_metadata( + pub(crate) fn new( path: &Path, - meta: &Metadata, + kind: FilesystemKind, + uid: u32, + gid: u32, + len: u64, + mode: u32, + mtime: i64, + mtime_ns: i64, + atime: i64, + atime_ns: i64, cache: &mut UsersCache, ) -> Result { - let kind = FilesystemKind::from_file_type(meta.file_type()); let symlink_target = if kind == FilesystemKind::Symlink { debug!("reading symlink target for {:?}", path); let target = @@ -85,8 +92,6 @@ impl FilesystemEntry { None }; - let uid = meta.st_uid(); - let gid = meta.st_gid(); let user: String = if let Some(user) = cache.get_user_by_uid(uid) { user.name().to_string_lossy().to_string() } else { @@ -100,13 +105,13 @@ impl FilesystemEntry { Ok(Self { path: path.to_path_buf().into_os_string().into_vec(), - kind: FilesystemKind::from_file_type(meta.file_type()), - len: meta.len(), - mode: meta.st_mode(), - mtime: meta.st_mtime(), - mtime_ns: meta.st_mtime_nsec(), - atime: meta.st_atime(), - atime_ns: meta.st_atime_nsec(), + kind, + len, + mode, + mtime, + mtime_ns, + atime, + atime_ns, symlink_target, uid, gid, @@ -115,6 +120,28 @@ impl FilesystemEntry { }) } + /// Create an `FsEntry` from a file's metadata. + pub fn from_metadata( + path: &Path, + meta: &Metadata, + cache: &mut UsersCache, + ) -> Result { + let kind = FilesystemKind::from_file_type(meta.file_type()); + Self::new( + path, + kind, + meta.st_uid(), + meta.st_gid(), + meta.len(), + meta.st_mode(), + meta.st_mtime(), + meta.st_mtime_nsec(), + meta.st_atime(), + meta.st_atime_nsec(), + cache, + ) + } + /// Return the kind of file the entry refers to. pub fn kind(&self) -> FilesystemKind { self.kind diff --git a/src/generation.rs b/src/generation.rs index 180efbe..cec3d14 100644 --- a/src/generation.rs +++ b/src/generation.rs @@ -296,8 +296,44 @@ impl LocalGeneration { #[cfg(test)] mod test { - use super::{LabelChecksumKind, LocalGeneration, NascentGeneration, SchemaVersion}; - use tempfile::NamedTempFile; + use super::{LabelChecksumKind, LocalGeneration, NascentGeneration, Reason, SchemaVersion}; + use crate::fsentry::FilesystemEntry; + use crate::fsentry::FilesystemKind; + use std::path::Path; + use tempfile::{tempdir, NamedTempFile}; + use users::UsersCache; + + #[test] + fn round_trips_u64_max() { + let tmp = tempdir().unwrap(); + let filename = tmp.path().join("test.db"); + let mut cache = UsersCache::new(); + let schema = SchemaVersion::new(0, 0); + { + let e = FilesystemEntry::new( + Path::new("/"), + FilesystemKind::Directory, + 0, + 0, + u64::MAX, + 0, + 0, + 0, + 0, + 0, + &mut cache, + ) + .unwrap(); + let mut gen = + NascentGeneration::create(&filename, schema, LabelChecksumKind::Sha256).unwrap(); + gen.insert(e, &[], Reason::IsNew, false).unwrap(); + gen.close().unwrap(); + } + + let db = LocalGeneration::open(&filename).unwrap(); + let e = db.get_file(Path::new("/")).unwrap().unwrap(); + assert_eq!(e.len(), u64::MAX); + } #[test] fn empty() { -- cgit v1.2.1 From 4913347201f4d00ccaf959c53357241d5bc3f9e0 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Tue, 3 May 2022 11:22:23 +0300 Subject: refactor: add a builder for file system entries The previous commit introduced a function to create FilesystemEntry values from arbitrary data. Previously one could only be created from std::fs::Metadata. This complicated our own testing, which (now) needs to construct an arbitrary entry structure. However, while the function added in the last commit was straightforward, it had 11 arguments, and that's hard to keep track of. Replace that function with an EntryBuilder struct, for clarity. Sponsored-by: author --- src/fsentry.rs | 201 +++++++++++++++++++++++++++++++++++++----------------- src/generation.rs | 27 +++----- 2 files changed, 145 insertions(+), 83 deletions(-) diff --git a/src/fsentry.rs b/src/fsentry.rs index a04a3de..276e3f9 100644 --- a/src/fsentry.rs +++ b/src/fsentry.rs @@ -69,57 +69,6 @@ pub enum FsEntryError { #[allow(clippy::len_without_is_empty)] impl FilesystemEntry { - /// Create an `FsEntry` from a file's metadata. - pub(crate) fn new( - path: &Path, - kind: FilesystemKind, - uid: u32, - gid: u32, - len: u64, - mode: u32, - mtime: i64, - mtime_ns: i64, - atime: i64, - atime_ns: i64, - cache: &mut UsersCache, - ) -> Result { - let symlink_target = if kind == FilesystemKind::Symlink { - debug!("reading symlink target for {:?}", path); - let target = - read_link(path).map_err(|err| FsEntryError::ReadLink(path.to_path_buf(), err))?; - Some(target) - } else { - None - }; - - let user: String = if let Some(user) = cache.get_user_by_uid(uid) { - user.name().to_string_lossy().to_string() - } else { - "".to_string() - }; - let group = if let Some(group) = cache.get_group_by_gid(gid) { - group.name().to_string_lossy().to_string() - } else { - "".to_string() - }; - - Ok(Self { - path: path.to_path_buf().into_os_string().into_vec(), - kind, - len, - mode, - mtime, - mtime_ns, - atime, - atime_ns, - symlink_target, - uid, - gid, - user, - group, - }) - } - /// Create an `FsEntry` from a file's metadata. pub fn from_metadata( path: &Path, @@ -127,19 +76,16 @@ impl FilesystemEntry { cache: &mut UsersCache, ) -> Result { let kind = FilesystemKind::from_file_type(meta.file_type()); - Self::new( - path, - kind, - meta.st_uid(), - meta.st_gid(), - meta.len(), - meta.st_mode(), - meta.st_mtime(), - meta.st_mtime_nsec(), - meta.st_atime(), - meta.st_atime_nsec(), - cache, - ) + Ok(EntryBuilder::new(kind) + .path(path.to_path_buf()) + .len(meta.len()) + .mode(meta.st_mode()) + .mtime(meta.st_mtime(), meta.st_mtime_nsec()) + .atime(meta.st_atime(), meta.st_atime_nsec()) + .user(meta.st_uid(), cache)? + .group(meta.st_uid(), cache)? + .symlink_target()? + .build()) } /// Return the kind of file the entry refers to. @@ -194,6 +140,133 @@ impl FilesystemEntry { } } +#[derive(Debug)] +pub(crate) struct EntryBuilder { + kind: FilesystemKind, + path: PathBuf, + len: u64, + + // 16 bits should be enough for a Unix mode_t. + // https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/sys_stat.h.html + // However, it's 32 bits on Linux, so that's what we store. + mode: u32, + + // Linux can store file system time stamps in nanosecond + // resolution. We store them as two 64-bit integers. + mtime: i64, + mtime_ns: i64, + atime: i64, + atime_ns: i64, + + // The target of a symbolic link, if any. + symlink_target: Option, + + // User and group owning the file. We store them as both the + // numeric id and the textual name corresponding to the numeric id + // at the time of the backup. + uid: u32, + gid: u32, + user: String, + group: String, +} + +impl EntryBuilder { + pub(crate) fn new(kind: FilesystemKind) -> Self { + Self { + kind, + path: PathBuf::new(), + len: 0, + mode: 0, + mtime: 0, + mtime_ns: 0, + atime: 0, + atime_ns: 0, + symlink_target: None, + uid: 0, + user: "".to_string(), + gid: 0, + group: "".to_string(), + } + } + + pub(crate) fn build(self) -> FilesystemEntry { + FilesystemEntry { + kind: self.kind, + path: self.path.into_os_string().into_vec(), + len: self.len, + mode: self.mode, + mtime: self.mtime, + mtime_ns: self.mtime_ns, + atime: self.atime, + atime_ns: self.atime_ns, + symlink_target: self.symlink_target, + uid: self.uid, + user: self.user, + gid: self.gid, + group: self.group, + } + } + + pub(crate) fn path(mut self, path: PathBuf) -> Self { + self.path = path; + self + } + + pub(crate) fn len(mut self, len: u64) -> Self { + self.len = len; + self + } + + pub(crate) fn mode(mut self, mode: u32) -> Self { + self.mode = mode; + self + } + + pub(crate) fn mtime(mut self, secs: i64, nsec: i64) -> Self { + self.mtime = secs; + self.mtime_ns = nsec; + self + } + + pub(crate) fn atime(mut self, secs: i64, nsec: i64) -> Self { + self.atime = secs; + self.atime_ns = nsec; + self + } + + pub(crate) fn symlink_target(mut self) -> Result { + self.symlink_target = if self.kind == FilesystemKind::Symlink { + debug!("reading symlink target for {:?}", self.path); + let target = read_link(&self.path) + .map_err(|err| FsEntryError::ReadLink(self.path.clone(), err))?; + Some(target) + } else { + None + }; + Ok(self) + } + + pub(crate) fn user(mut self, uid: u32, cache: &mut UsersCache) -> Result { + self.uid = uid; + self.user = if let Some(user) = cache.get_user_by_uid(uid) { + user.name().to_string_lossy().to_string() + } else { + "".to_string() + }; + Ok(self) + } + + pub(crate) fn group(mut self, gid: u32, cache: &mut UsersCache) -> Result { + self.gid = gid; + self.group = if let Some(group) = cache.get_group_by_gid(gid) { + group.name().to_string_lossy().to_string() + } else { + "".to_string() + }; + Ok(self) + } +} + /// Different types of file system entries. #[derive(Debug, Copy, Clone, PartialEq, Serialize, Deserialize)] pub enum FilesystemKind { diff --git a/src/generation.rs b/src/generation.rs index cec3d14..477edc0 100644 --- a/src/generation.rs +++ b/src/generation.rs @@ -297,33 +297,22 @@ impl LocalGeneration { #[cfg(test)] mod test { use super::{LabelChecksumKind, LocalGeneration, NascentGeneration, Reason, SchemaVersion}; - use crate::fsentry::FilesystemEntry; + use crate::fsentry::EntryBuilder; use crate::fsentry::FilesystemKind; - use std::path::Path; + use std::path::PathBuf; use tempfile::{tempdir, NamedTempFile}; - use users::UsersCache; #[test] fn round_trips_u64_max() { let tmp = tempdir().unwrap(); let filename = tmp.path().join("test.db"); - let mut cache = UsersCache::new(); + let path = PathBuf::from("/"); let schema = SchemaVersion::new(0, 0); { - let e = FilesystemEntry::new( - Path::new("/"), - FilesystemKind::Directory, - 0, - 0, - u64::MAX, - 0, - 0, - 0, - 0, - 0, - &mut cache, - ) - .unwrap(); + let e = EntryBuilder::new(FilesystemKind::Directory) + .path(path.clone()) + .len(u64::MAX) + .build(); let mut gen = NascentGeneration::create(&filename, schema, LabelChecksumKind::Sha256).unwrap(); gen.insert(e, &[], Reason::IsNew, false).unwrap(); @@ -331,7 +320,7 @@ mod test { } let db = LocalGeneration::open(&filename).unwrap(); - let e = db.get_file(Path::new("/")).unwrap().unwrap(); + let e = db.get_file(&path).unwrap().unwrap(); assert_eq!(e.len(), u64::MAX); } -- cgit v1.2.1