From 7c0fc1d786cafcbd4e9e9659089e633a7fc7c092 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Thu, 17 Sep 2020 10:21:39 +0300 Subject: feat: add an in-memory index of chunks for searching --- src/chunkid.rs | 2 +- src/index.rs | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + 3 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 src/index.rs (limited to 'src') diff --git a/src/chunkid.rs b/src/chunkid.rs index f3af7cf..771cc8c 100644 --- a/src/chunkid.rs +++ b/src/chunkid.rs @@ -17,7 +17,7 @@ use uuid::Uuid; /// /// Because every identifier is meant to be different, there is no /// default value, since default values should be identical. -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] pub struct ChunkId { id: String, } diff --git a/src/index.rs b/src/index.rs new file mode 100644 index 0000000..ed0183e --- /dev/null +++ b/src/index.rs @@ -0,0 +1,97 @@ +use crate::chunkid::ChunkId; +use std::collections::HashMap; +use std::default::Default; + +/// A chunk index. +/// +/// A chunk index lets the server quickly find chunks based on a +/// string key/value pair, or whether they are generations. +#[derive(Debug, Default)] +pub struct Index { + map: HashMap<(String, String), Vec>, + generations: Vec, +} + +impl Index { + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn len(&self) -> usize { + self.map.len() + } + + pub fn insert(&mut self, id: ChunkId, key: &str, value: &str) { + let kv = kv(key, value); + if let Some(v) = self.map.get_mut(&kv) { + v.push(id) + } else { + self.map.insert(kv, vec![id]); + } + } + + pub fn find(&self, key: &str, value: &str) -> Vec { + let kv = kv(key, value); + if let Some(v) = self.map.get(&kv) { + v.clone() + } else { + vec![] + } + } + + pub fn insert_generation(&mut self, id: ChunkId) { + self.generations.push(id) + } + + pub fn find_generations(&self) -> Vec { + self.generations.clone() + } +} + +fn kv(key: &str, value: &str) -> (String, String) { + (key.to_string(), value.to_string()) +} + +#[cfg(test)] +mod test { + use super::{ChunkId, Index}; + + #[test] + fn is_empty_initially() { + let idx = Index::default(); + assert!(idx.is_empty()); + } + + #[test] + fn remembers_inserted() { + let id: ChunkId = "id001".parse().unwrap(); + let mut idx = Index::default(); + idx.insert(id.clone(), "sha256", "abc"); + assert!(!idx.is_empty()); + assert_eq!(idx.len(), 1); + let ids: Vec = idx.find("sha256", "abc"); + assert_eq!(ids, vec![id]); + } + + #[test] + fn does_not_find_uninserted() { + let id: ChunkId = "id001".parse().unwrap(); + let mut idx = Index::default(); + idx.insert(id, "sha256", "abc"); + assert_eq!(idx.find("sha256", "def").len(), 0) + } + + #[test] + fn has_no_generations_initially() { + let idx = Index::default(); + assert_eq!(idx.find_generations(), vec![]); + } + + #[test] + fn remembers_generation() { + let id: ChunkId = "id001".parse().unwrap(); + let mut idx = Index::default(); + idx.insert_generation(id.clone()); + assert_eq!(idx.find_generations(), vec![id]); + } +} diff --git a/src/lib.rs b/src/lib.rs index 3c99f83..720306d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ pub mod chunk; pub mod chunkid; pub mod chunkmeta; +pub mod index; -- cgit v1.2.1