diff options
author | Lars Wirzenius <liw@liw.fi> | 2016-08-13 18:06:26 +0300 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2016-08-14 10:13:13 +0300 |
commit | 022aa0cba1102fa234c8b5d9f4fa9e3fbc108cc0 (patch) | |
tree | 13fa85476f92a37fba12dafce6650fd9fef5c514 | |
parent | b88b51305f7b96077e5ee0517248dc45029c9d4f (diff) | |
download | obnam-022aa0cba1102fa234c8b5d9f4fa9e3fbc108cc0.tar.gz |
Break CowTree into many nodes
-rw-r--r-- | obnamlib/fmt_ga/cowtree.py | 118 | ||||
-rw-r--r-- | obnamlib/fmt_ga/cowtree_tests.py | 17 | ||||
-rw-r--r-- | obnamlib/fmt_ga/leaf_store.py | 9 | ||||
-rw-r--r-- | obnamlib/fmt_ga/leaf_store_tests.py | 3 |
4 files changed, 142 insertions, 5 deletions
diff --git a/obnamlib/fmt_ga/cowtree.py b/obnamlib/fmt_ga/cowtree.py index 7500b089..f1f74385 100644 --- a/obnamlib/fmt_ga/cowtree.py +++ b/obnamlib/fmt_ga/cowtree.py @@ -16,6 +16,8 @@ # =*= License: GPL-3+ =*= +import copy + import obnamlib @@ -23,19 +25,125 @@ class CowTree(object): def __init__(self): self._store = None - self._leaf = obnamlib.CowLeaf() + self._leaf_list = _LeafList() + self._max_keys_per_leaf = 1024 # FIXME: This should be configurable? def set_leaf_store(self, leaf_store): self._store = leaf_store def set_list_node(self, leaf_id): - self._leaf = self._store.get_leaf(leaf_id) + fake_leaf = self._store.get_leaf(leaf_id) + self._leaf_list.from_dict(fake_leaf.lookup('leaf_list')) + + def set_max_leaf_size(self, max_keys): + assert max_keys >= 2 + self._max_keys_per_leaf = max_keys def lookup(self, key): - return self._leaf.lookup(key) + leaf_id = self._leaf_list.find_leaf_for_key(key) + if leaf_id is None: + return None + + leaf = self._store.get_leaf(leaf_id) + assert leaf is not None + return leaf.lookup(key) def insert(self, key, value): - self._leaf.insert(key, value) + leaf_id = self._leaf_list.find_leaf_for_key(key) + if leaf_id is None: + self._add_new_leaf(key, value) + else: + self._insert_into_leaf(leaf_id, key, value) + + def _add_new_leaf(self, key, value): + leaf = obnamlib.CowLeaf() + leaf.insert(key, value) + leaf_id = self._store.put_leaf(leaf) + self._leaf_list.insert_leaf(key, key, leaf_id) + + def _insert_into_leaf(self, leaf_id, key, value): + leaf = self._store.get_leaf(leaf_id) + assert leaf is not None + leaf.insert(key, value) + keys = list(sorted(leaf.keys())) + self._leaf_list.update_leaf(leaf_id, keys[0], keys[-1]) + if len(leaf) > self._max_keys_per_leaf: + self._leaf_list.drop_leaf(leaf_id) + self._split_leaf(leaf) + + def _split_leaf(self, leaf): + sorted_keys = list(sorted(leaf.keys())) + n = len(sorted_keys) / 2 + + self._make_split_leaf(leaf, sorted_keys[:n]) + self._make_split_leaf(leaf, sorted_keys[n:]) + + def _make_split_leaf(self, leaf, sorted_keys): + new = obnamlib.CowLeaf() + for key in sorted_keys: + new.insert(key, leaf.lookup(key)) + new_id = self._store.put_leaf(new) + self._leaf_list.insert_leaf(sorted_keys[0], sorted_keys[-1], new_id) def commit(self): - return self._store.put_leaf(self._leaf) + fake_leaf = obnamlib.CowLeaf() + fake_leaf.insert('leaf_list', self._leaf_list.as_dict()) + list_id = self._store.put_leaf(fake_leaf) + self._store.flush() + return list_id + + +class _LeafList(object): + + def __init__(self): + self._leaf_list = [] + + def as_dict(self): + # This isn't really returning a dict, but that's OK. We only + # need to return something that serialise_object can handle. + return copy.deepcopy(self._leaf_list) + + def from_dict(self, some_dict): + self._leaf_list = some_dict + + def find_leaf_for_key(self, key): + # If there are no leaves, we can't pick one for key. + if not self._leaf_list: + return None + + # Pick last one if key is too big. + last = self._leaf_list[-1] + if key >= last['last_key']: + return last['id'] + + # Otherwise, pick first leaf whose last key >= key. We now + # know there is such a node. + for leaf_info in self._leaf_list: + if leaf_info['last_key'] >= key: + return leaf_info['id'] + + # If we get here, something's badly wrong. + assert False # pragma: no cover + + def insert_leaf(self, first_key, last_key, leaf_id): + leaf_info = { + 'first_key': first_key, + 'last_key': last_key, + 'id': leaf_id, + } + + self._leaf_list.append(leaf_info) + self._leaf_list.sort(key=lambda li: li['first_key']) + + def update_leaf(self, leaf_id, first_key, last_key): + for leaf_info in self._leaf_list: + if leaf_info['id'] == leaf_id: + leaf_info['first_key'] = first_key + leaf_info['last_key'] = last_key + + def drop_leaf(self, leaf_id): + self._leaf_list = [ + leaf_info + for leaf_info in self._leaf_list + if leaf_info['id'] != leaf_id + ] diff --git a/obnamlib/fmt_ga/cowtree_tests.py b/obnamlib/fmt_ga/cowtree_tests.py index 7e8dd114..b34ce2f0 100644 --- a/obnamlib/fmt_ga/cowtree_tests.py +++ b/obnamlib/fmt_ga/cowtree_tests.py @@ -37,6 +37,23 @@ class CowTreeTests(unittest.TestCase): self.cow.insert(key, value) self.assertEqual(self.cow.lookup(key), value) + def test_inserts_many_keys(self): + N = 10 + self.cow.set_max_leaf_size(N/3) + + keyvalues = [ + ('key-{}'.format(i), 'value-{}'.format(i)) + for i in range(N) + ] + + # Insert in reverse order in order to exercise all the code + # paths in _LeafList.find_leaf_for_key. + for key, value in reversed(keyvalues): + self.cow.insert(key, value) + + for key, value in keyvalues: + self.assertEqual(self.cow.lookup(key), value) + def test_commits_changes_persistently(self): key = 'fookey' value = 'barvalue' diff --git a/obnamlib/fmt_ga/leaf_store.py b/obnamlib/fmt_ga/leaf_store.py index b5cb5120..5d488e96 100644 --- a/obnamlib/fmt_ga/leaf_store.py +++ b/obnamlib/fmt_ga/leaf_store.py @@ -27,6 +27,9 @@ class LeafStoreInterface(object): # pragma: no cover def get_leaf(self, leaf_id): raise NotImplementedError() + def flush(self): + raise NotImplementedError() + class InMemoryLeafStore(LeafStoreInterface): @@ -42,6 +45,9 @@ class InMemoryLeafStore(LeafStoreInterface): def get_leaf(self, leaf_id): return self._leaves.get(leaf_id, None) + def flush(self): + pass + class LeafStore(LeafStoreInterface): # pragma: no cover @@ -58,3 +64,6 @@ class LeafStore(LeafStoreInterface): # pragma: no cover leaf = obnamlib.CowLeaf() leaf.from_dict(self._blob_store.get_blob(leaf_id)) return leaf + + def flush(self): + self._blob_store.flush() diff --git a/obnamlib/fmt_ga/leaf_store_tests.py b/obnamlib/fmt_ga/leaf_store_tests.py index 7eb3c8e4..9d1a8f71 100644 --- a/obnamlib/fmt_ga/leaf_store_tests.py +++ b/obnamlib/fmt_ga/leaf_store_tests.py @@ -40,6 +40,9 @@ class LeafStoreTests(object): def test_returns_None_if_leaf_is_missing(self): self.assertEqual(self.ls.get_leaf(42), None) + def test_has_flush(self): + self.assertEqual(self.ls.flush(), None) + class InMemoryLeafStoreTests(unittest.TestCase, LeafStoreTests): |