From 62b533e319f8a67db908a541c48b2d3a25edeb58 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sat, 26 Dec 2020 10:05:58 +0200 Subject: refactor: rename FileSystemEntry::path to pathbuf This is a step towards changing how filenames are stored in FileSystemEntry. --- src/client.rs | 6 +++--- src/cmd/backup.rs | 5 +++-- src/cmd/restore.rs | 11 ++++++----- src/fsentry.rs | 4 ++-- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/client.rs b/src/client.rs index af8d65e..3149ec5 100644 --- a/src/client.rs +++ b/src/client.rs @@ -62,7 +62,7 @@ impl BackupClient { ) -> anyhow::Result<(FilesystemEntry, Vec)> { debug!("entry: {:?}", e); let ids = match e.kind() { - FilesystemKind::Regular => self.read_file(e.path(), size)?, + FilesystemKind::Regular => self.read_file(e.pathbuf(), size)?, FilesystemKind::Directory => vec![], FilesystemKind::Symlink => vec![], }; @@ -70,7 +70,7 @@ impl BackupClient { } pub fn upload_generation(&self, filename: &Path, size: usize) -> anyhow::Result { - let ids = self.read_file(filename, size)?; + let ids = self.read_file(filename.to_path_buf(), size)?; let gen = GenerationChunk::new(ids); let data = gen.to_data_chunk()?; let meta = ChunkMeta::new_generation(&sha256(data.data()), "timestamp"); @@ -78,7 +78,7 @@ impl BackupClient { Ok(gen_id) } - fn read_file(&self, filename: &Path, size: usize) -> anyhow::Result> { + fn read_file(&self, filename: PathBuf, size: usize) -> anyhow::Result> { info!("uploading {}", filename.display()); let file = std::fs::File::open(filename)?; let chunker = Chunker::new(size, file); diff --git a/src/cmd/backup.rs b/src/cmd/backup.rs index 3303566..2a294f5 100644 --- a/src/cmd/backup.rs +++ b/src/cmd/backup.rs @@ -30,8 +30,9 @@ pub fn backup(config: &ClientConfig, buffer_size: usize) -> anyhow::Result<()> { match entry { Err(err) => Err(err), Ok(entry) => { - info!("backup: {}", entry.path().display()); - progress.set_message(&format!("{}", entry.path().display())); + let path = &entry.pathbuf(); + info!("backup: {}", path.display()); + progress.set_message(&format!("{}", path.display())); client.upload_filesystem_entry(entry, buffer_size) } } diff --git a/src/cmd/restore.rs b/src/cmd/restore.rs index b4a8f2b..da654fd 100644 --- a/src/cmd/restore.rs +++ b/src/cmd/restore.rs @@ -81,7 +81,7 @@ fn restore_generation( progress: &ProgressBar, ) -> anyhow::Result<()> { debug!("restoring {:?}", entry); - progress.set_message(&format!("{}", entry.path().display())); + progress.set_message(&format!("{}", entry.pathbuf().display())); progress.inc(1); let to = restored_path(entry, to)?; @@ -112,10 +112,11 @@ fn restore_directory_metadata(entry: &FilesystemEntry, to: &Path) -> anyhow::Res } fn restored_path(entry: &FilesystemEntry, to: &Path) -> anyhow::Result { - let path = if entry.path().is_absolute() { - entry.path().strip_prefix("/")? + let path = &entry.pathbuf(); + let path = if path.is_absolute() { + path.strip_prefix("/")? } else { - entry.path() + path }; Ok(to.join(path)) } @@ -158,7 +159,7 @@ fn restore_symlink(path: &Path, entry: &FilesystemEntry) -> anyhow::Result<()> { } fn restore_metadata(path: &Path, entry: &FilesystemEntry) -> anyhow::Result<()> { - debug!("restoring metadata for {}", entry.path().display()); + debug!("restoring metadata for {}", entry.pathbuf().display()); let handle = File::open(path)?; diff --git a/src/fsentry.rs b/src/fsentry.rs index c66e3e0..844642a 100644 --- a/src/fsentry.rs +++ b/src/fsentry.rs @@ -60,8 +60,8 @@ impl FilesystemEntry { self.kind } - pub fn path(&self) -> &Path { - &self.path + pub fn pathbuf(&self) -> PathBuf { + self.path.to_path_buf() } pub fn len(&self) -> u64 { -- cgit v1.2.1 From 2f95edc25f127fd5ac1fb9df96bf8489dbece3ec Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sat, 26 Dec 2020 10:09:44 +0200 Subject: feat! store pathnames a vectors of bytes This is the most generic way to store filenames. --- src/fsentry.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/fsentry.rs b/src/fsentry.rs index 844642a..471b1bd 100644 --- a/src/fsentry.rs +++ b/src/fsentry.rs @@ -1,7 +1,9 @@ use serde::{Deserialize, Serialize}; +use std::ffi::OsString; use std::fs::read_link; use std::fs::{FileType, Metadata}; use std::os::linux::fs::MetadataExt; +use std::os::unix::ffi::OsStringExt; use std::path::{Path, PathBuf}; /// A file system entry. @@ -16,7 +18,7 @@ use std::path::{Path, PathBuf}; #[derive(Debug, Serialize, Deserialize)] pub struct FilesystemEntry { kind: FilesystemKind, - path: PathBuf, + path: Vec, len: u64, // 16 bits should be enough for a Unix mode_t. @@ -40,7 +42,7 @@ impl FilesystemEntry { pub fn from_metadata(path: &Path, meta: &Metadata) -> anyhow::Result { let kind = FilesystemKind::from_file_type(meta.file_type()); Ok(Self { - path: path.to_path_buf(), + path: path.to_path_buf().into_os_string().into_vec(), kind: FilesystemKind::from_file_type(meta.file_type()), len: meta.len(), mode: meta.st_mode(), @@ -61,7 +63,8 @@ impl FilesystemEntry { } pub fn pathbuf(&self) -> PathBuf { - self.path.to_path_buf() + let path = self.path.clone(); + PathBuf::from(OsString::from_vec(path)) } pub fn len(&self) -> u64 { -- cgit v1.2.1 From 96831d81fefcb76f2b33bc97b6206f594a7fcd5f Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sat, 26 Dec 2020 10:25:59 +0200 Subject: test: add verification test for non-UTF8 filenames --- obnam.md | 19 +++++++++++++++++++ subplot/data.py | 6 ++++++ subplot/data.yaml | 3 +++ 3 files changed, 28 insertions(+) diff --git a/obnam.md b/obnam.md index fbbf994..6706fb7 100644 --- a/obnam.md +++ b/obnam.md @@ -876,6 +876,25 @@ given a manifest of the directory live restored in rest in rest.yaml then files live.yaml and rest.yaml match ~~~ +## Tricky filenames + +Obnam needs to handle all filenames the underlying operating and file +system can handle. This scenario verifies it can handle a filename +that consists on a single byte with its top bit set. This is not +ASCII, and it's not UTF-8. + +~~~scenario +given an installed obnam +and a running chunk server +and a client config based on metadata.yaml +and a file in live with a non-UTF8 filename +and a manifest of the directory live in live.yaml +when I run obnam --config metadata.yaml backup +then backup generation is GEN +when I invoke obnam --config metadata.yaml restore rest +given a manifest of the directory live restored in rest in rest.yaml +then files live.yaml and rest.yaml match +~~~ diff --git a/subplot/data.py b/subplot/data.py index ba3636c..a24cd0c 100644 --- a/subplot/data.py +++ b/subplot/data.py @@ -13,6 +13,12 @@ def create_file_with_random_data(ctx, filename=None): f.write(data) +def create_nonutf8_filename(ctx, dirname=None): + filename = "\x88" + os.mkdir(dirname) + open(filename, "wb").close() + + def chmod_file(ctx, filename=None, mode=None): os.chmod(filename, int(mode, 8)) diff --git a/subplot/data.yaml b/subplot/data.yaml index 32c9cd5..7659319 100644 --- a/subplot/data.yaml +++ b/subplot/data.yaml @@ -6,6 +6,9 @@ - given: "a file {filename} containing some random data" function: create_file_with_random_data +- given: "a file in {dirname} with a non-UTF8 filename" + function: create_nonutf8_filename + - given: file {filename} has mode {mode} function: chmod_file -- cgit v1.2.1