summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2016-08-13 18:06:26 +0300
committerLars Wirzenius <liw@liw.fi>2016-08-14 10:13:13 +0300
commit022aa0cba1102fa234c8b5d9f4fa9e3fbc108cc0 (patch)
tree13fa85476f92a37fba12dafce6650fd9fef5c514
parentb88b51305f7b96077e5ee0517248dc45029c9d4f (diff)
downloadobnam-022aa0cba1102fa234c8b5d9f4fa9e3fbc108cc0.tar.gz
Break CowTree into many nodes
-rw-r--r--obnamlib/fmt_ga/cowtree.py118
-rw-r--r--obnamlib/fmt_ga/cowtree_tests.py17
-rw-r--r--obnamlib/fmt_ga/leaf_store.py9
-rw-r--r--obnamlib/fmt_ga/leaf_store_tests.py3
4 files changed, 142 insertions, 5 deletions
diff --git a/obnamlib/fmt_ga/cowtree.py b/obnamlib/fmt_ga/cowtree.py
index 7500b089..f1f74385 100644
--- a/obnamlib/fmt_ga/cowtree.py
+++ b/obnamlib/fmt_ga/cowtree.py
@@ -16,6 +16,8 @@
# =*= License: GPL-3+ =*=
+import copy
+
import obnamlib
@@ -23,19 +25,125 @@ class CowTree(object):
def __init__(self):
self._store = None
- self._leaf = obnamlib.CowLeaf()
+ self._leaf_list = _LeafList()
+ self._max_keys_per_leaf = 1024 # FIXME: This should be configurable?
def set_leaf_store(self, leaf_store):
self._store = leaf_store
def set_list_node(self, leaf_id):
- self._leaf = self._store.get_leaf(leaf_id)
+ fake_leaf = self._store.get_leaf(leaf_id)
+ self._leaf_list.from_dict(fake_leaf.lookup('leaf_list'))
+
+ def set_max_leaf_size(self, max_keys):
+ assert max_keys >= 2
+ self._max_keys_per_leaf = max_keys
def lookup(self, key):
- return self._leaf.lookup(key)
+ leaf_id = self._leaf_list.find_leaf_for_key(key)
+ if leaf_id is None:
+ return None
+
+ leaf = self._store.get_leaf(leaf_id)
+ assert leaf is not None
+ return leaf.lookup(key)
def insert(self, key, value):
- self._leaf.insert(key, value)
+ leaf_id = self._leaf_list.find_leaf_for_key(key)
+ if leaf_id is None:
+ self._add_new_leaf(key, value)
+ else:
+ self._insert_into_leaf(leaf_id, key, value)
+
+ def _add_new_leaf(self, key, value):
+ leaf = obnamlib.CowLeaf()
+ leaf.insert(key, value)
+ leaf_id = self._store.put_leaf(leaf)
+ self._leaf_list.insert_leaf(key, key, leaf_id)
+
+ def _insert_into_leaf(self, leaf_id, key, value):
+ leaf = self._store.get_leaf(leaf_id)
+ assert leaf is not None
+ leaf.insert(key, value)
+ keys = list(sorted(leaf.keys()))
+ self._leaf_list.update_leaf(leaf_id, keys[0], keys[-1])
+ if len(leaf) > self._max_keys_per_leaf:
+ self._leaf_list.drop_leaf(leaf_id)
+ self._split_leaf(leaf)
+
+ def _split_leaf(self, leaf):
+ sorted_keys = list(sorted(leaf.keys()))
+ n = len(sorted_keys) / 2
+
+ self._make_split_leaf(leaf, sorted_keys[:n])
+ self._make_split_leaf(leaf, sorted_keys[n:])
+
+ def _make_split_leaf(self, leaf, sorted_keys):
+ new = obnamlib.CowLeaf()
+ for key in sorted_keys:
+ new.insert(key, leaf.lookup(key))
+ new_id = self._store.put_leaf(new)
+ self._leaf_list.insert_leaf(sorted_keys[0], sorted_keys[-1], new_id)
def commit(self):
- return self._store.put_leaf(self._leaf)
+ fake_leaf = obnamlib.CowLeaf()
+ fake_leaf.insert('leaf_list', self._leaf_list.as_dict())
+ list_id = self._store.put_leaf(fake_leaf)
+ self._store.flush()
+ return list_id
+
+
+class _LeafList(object):
+
+ def __init__(self):
+ self._leaf_list = []
+
+ def as_dict(self):
+ # This isn't really returning a dict, but that's OK. We only
+ # need to return something that serialise_object can handle.
+ return copy.deepcopy(self._leaf_list)
+
+ def from_dict(self, some_dict):
+ self._leaf_list = some_dict
+
+ def find_leaf_for_key(self, key):
+ # If there are no leaves, we can't pick one for key.
+ if not self._leaf_list:
+ return None
+
+ # Pick last one if key is too big.
+ last = self._leaf_list[-1]
+ if key >= last['last_key']:
+ return last['id']
+
+ # Otherwise, pick first leaf whose last key >= key. We now
+ # know there is such a node.
+ for leaf_info in self._leaf_list:
+ if leaf_info['last_key'] >= key:
+ return leaf_info['id']
+
+ # If we get here, something's badly wrong.
+ assert False # pragma: no cover
+
+ def insert_leaf(self, first_key, last_key, leaf_id):
+ leaf_info = {
+ 'first_key': first_key,
+ 'last_key': last_key,
+ 'id': leaf_id,
+ }
+
+ self._leaf_list.append(leaf_info)
+ self._leaf_list.sort(key=lambda li: li['first_key'])
+
+ def update_leaf(self, leaf_id, first_key, last_key):
+ for leaf_info in self._leaf_list:
+ if leaf_info['id'] == leaf_id:
+ leaf_info['first_key'] = first_key
+ leaf_info['last_key'] = last_key
+
+ def drop_leaf(self, leaf_id):
+ self._leaf_list = [
+ leaf_info
+ for leaf_info in self._leaf_list
+ if leaf_info['id'] != leaf_id
+ ]
diff --git a/obnamlib/fmt_ga/cowtree_tests.py b/obnamlib/fmt_ga/cowtree_tests.py
index 7e8dd114..b34ce2f0 100644
--- a/obnamlib/fmt_ga/cowtree_tests.py
+++ b/obnamlib/fmt_ga/cowtree_tests.py
@@ -37,6 +37,23 @@ class CowTreeTests(unittest.TestCase):
self.cow.insert(key, value)
self.assertEqual(self.cow.lookup(key), value)
+ def test_inserts_many_keys(self):
+ N = 10
+ self.cow.set_max_leaf_size(N/3)
+
+ keyvalues = [
+ ('key-{}'.format(i), 'value-{}'.format(i))
+ for i in range(N)
+ ]
+
+ # Insert in reverse order in order to exercise all the code
+ # paths in _LeafList.find_leaf_for_key.
+ for key, value in reversed(keyvalues):
+ self.cow.insert(key, value)
+
+ for key, value in keyvalues:
+ self.assertEqual(self.cow.lookup(key), value)
+
def test_commits_changes_persistently(self):
key = 'fookey'
value = 'barvalue'
diff --git a/obnamlib/fmt_ga/leaf_store.py b/obnamlib/fmt_ga/leaf_store.py
index b5cb5120..5d488e96 100644
--- a/obnamlib/fmt_ga/leaf_store.py
+++ b/obnamlib/fmt_ga/leaf_store.py
@@ -27,6 +27,9 @@ class LeafStoreInterface(object): # pragma: no cover
def get_leaf(self, leaf_id):
raise NotImplementedError()
+ def flush(self):
+ raise NotImplementedError()
+
class InMemoryLeafStore(LeafStoreInterface):
@@ -42,6 +45,9 @@ class InMemoryLeafStore(LeafStoreInterface):
def get_leaf(self, leaf_id):
return self._leaves.get(leaf_id, None)
+ def flush(self):
+ pass
+
class LeafStore(LeafStoreInterface): # pragma: no cover
@@ -58,3 +64,6 @@ class LeafStore(LeafStoreInterface): # pragma: no cover
leaf = obnamlib.CowLeaf()
leaf.from_dict(self._blob_store.get_blob(leaf_id))
return leaf
+
+ def flush(self):
+ self._blob_store.flush()
diff --git a/obnamlib/fmt_ga/leaf_store_tests.py b/obnamlib/fmt_ga/leaf_store_tests.py
index 7eb3c8e4..9d1a8f71 100644
--- a/obnamlib/fmt_ga/leaf_store_tests.py
+++ b/obnamlib/fmt_ga/leaf_store_tests.py
@@ -40,6 +40,9 @@ class LeafStoreTests(object):
def test_returns_None_if_leaf_is_missing(self):
self.assertEqual(self.ls.get_leaf(42), None)
+ def test_has_flush(self):
+ self.assertEqual(self.ls.flush(), None)
+
class InMemoryLeafStoreTests(unittest.TestCase, LeafStoreTests):