From 4966d7e7b45d0a33af899f8c0790097509ba8be1 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Wed, 25 Nov 2020 11:04:21 +0200 Subject: perf: store chunks on disk in a 3-level directory tree git does the same thing. This improves the wall clock time to run benchmark-store with a million chunks from a 40.36 seconds to 18.70, on a system with NVMe and ext4. --- src/store.rs | 42 +++++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) (limited to 'src/store.rs') diff --git a/src/store.rs b/src/store.rs index 8d50b2f..a415d7a 100644 --- a/src/store.rs +++ b/src/store.rs @@ -1,6 +1,7 @@ use crate::chunk::DataChunk; use crate::chunkid::ChunkId; use crate::chunkmeta::ChunkMeta; +use anyhow::Context; use std::path::{Path, PathBuf}; /// Store chunks, with metadata, persistently. @@ -20,35 +21,58 @@ impl Store { } } - // Construct name for a file in the store from chunk id and suffix. - fn filename(&self, id: &ChunkId, suffix: &str) -> PathBuf { - self.dir.join(format!("{}.{}", id, suffix)) + // Construct name for a files in the store from chunk id. + // + // The name of directory containing the file is returned + // separately to make it easier to create it if needed. + fn filenames(&self, id: &ChunkId) -> (PathBuf, PathBuf, PathBuf) { + let bytes = id.as_bytes(); + assert!(bytes.len() > 3); + let a = bytes[0]; + let b = bytes[1]; + let c = bytes[2]; + let dir = self.dir.join(format!("{}/{}/{}", a, b, c)); + let meta = dir.join(format!("{}.{}", id, "meta")); + let data = dir.join(format!("{}.{}", id, "data")); + (dir, meta, data) } /// Save a chunk into a store. pub fn save(&self, id: &ChunkId, meta: &ChunkMeta, chunk: &DataChunk) -> anyhow::Result<()> { - std::fs::write(&self.filename(id, "meta"), meta.to_json())?; - std::fs::write(&self.filename(id, "data"), chunk.data())?; + let (dir, metaname, dataname) = &self.filenames(id); + + if !dir.exists() { + let res = std::fs::create_dir_all(dir).into(); + if let Err(_) = res { + return res.with_context(|| format!("creating directory {}", dir.display())); + } + } + + std::fs::write(&metaname, meta.to_json())?; + std::fs::write(&dataname, chunk.data())?; Ok(()) } /// Load a chunk's metadata from a store. pub fn load_meta(&self, id: &ChunkId) -> anyhow::Result { - let meta = std::fs::read(&self.filename(id, "meta"))?; + let (_, metaname, _) = &self.filenames(id); + let meta = std::fs::read(&metaname)?; Ok(serde_json::from_slice(&meta)?) } /// Load a chunk from a store. pub fn load(&self, id: &ChunkId) -> anyhow::Result<(ChunkMeta, DataChunk)> { + let (_, _, dataname) = &self.filenames(id); let meta = self.load_meta(id)?; - let data = std::fs::read(&self.filename(id, "data"))?; + let data = std::fs::read(&dataname)?; Ok((meta, DataChunk::new(data))) } /// Delete a chunk from a store. pub fn delete(&self, id: &ChunkId) -> anyhow::Result<()> { - std::fs::remove_file(&self.filename(id, "meta"))?; - std::fs::remove_file(&self.filename(id, "data"))?; + let (_, metaname, dataname) = &self.filenames(id); + std::fs::remove_file(&metaname)?; + std::fs::remove_file(&dataname)?; Ok(()) } } -- cgit v1.2.1