summaryrefslogtreecommitdiff
path: root/obnamlib/fmt_ga/cowtree.py
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2016-08-13 18:06:26 +0300
committerLars Wirzenius <liw@liw.fi>2016-08-14 10:13:13 +0300
commit022aa0cba1102fa234c8b5d9f4fa9e3fbc108cc0 (patch)
tree13fa85476f92a37fba12dafce6650fd9fef5c514 /obnamlib/fmt_ga/cowtree.py
parentb88b51305f7b96077e5ee0517248dc45029c9d4f (diff)
downloadobnam-022aa0cba1102fa234c8b5d9f4fa9e3fbc108cc0.tar.gz
Break CowTree into many nodes
Diffstat (limited to 'obnamlib/fmt_ga/cowtree.py')
-rw-r--r--obnamlib/fmt_ga/cowtree.py118
1 files changed, 113 insertions, 5 deletions
diff --git a/obnamlib/fmt_ga/cowtree.py b/obnamlib/fmt_ga/cowtree.py
index 7500b089..f1f74385 100644
--- a/obnamlib/fmt_ga/cowtree.py
+++ b/obnamlib/fmt_ga/cowtree.py
@@ -16,6 +16,8 @@
# =*= License: GPL-3+ =*=
+import copy
+
import obnamlib
@@ -23,19 +25,125 @@ class CowTree(object):
def __init__(self):
self._store = None
- self._leaf = obnamlib.CowLeaf()
+ self._leaf_list = _LeafList()
+ self._max_keys_per_leaf = 1024 # FIXME: This should be configurable?
def set_leaf_store(self, leaf_store):
self._store = leaf_store
def set_list_node(self, leaf_id):
- self._leaf = self._store.get_leaf(leaf_id)
+ fake_leaf = self._store.get_leaf(leaf_id)
+ self._leaf_list.from_dict(fake_leaf.lookup('leaf_list'))
+
+ def set_max_leaf_size(self, max_keys):
+ assert max_keys >= 2
+ self._max_keys_per_leaf = max_keys
def lookup(self, key):
- return self._leaf.lookup(key)
+ leaf_id = self._leaf_list.find_leaf_for_key(key)
+ if leaf_id is None:
+ return None
+
+ leaf = self._store.get_leaf(leaf_id)
+ assert leaf is not None
+ return leaf.lookup(key)
def insert(self, key, value):
- self._leaf.insert(key, value)
+ leaf_id = self._leaf_list.find_leaf_for_key(key)
+ if leaf_id is None:
+ self._add_new_leaf(key, value)
+ else:
+ self._insert_into_leaf(leaf_id, key, value)
+
+ def _add_new_leaf(self, key, value):
+ leaf = obnamlib.CowLeaf()
+ leaf.insert(key, value)
+ leaf_id = self._store.put_leaf(leaf)
+ self._leaf_list.insert_leaf(key, key, leaf_id)
+
+ def _insert_into_leaf(self, leaf_id, key, value):
+ leaf = self._store.get_leaf(leaf_id)
+ assert leaf is not None
+ leaf.insert(key, value)
+ keys = list(sorted(leaf.keys()))
+ self._leaf_list.update_leaf(leaf_id, keys[0], keys[-1])
+ if len(leaf) > self._max_keys_per_leaf:
+ self._leaf_list.drop_leaf(leaf_id)
+ self._split_leaf(leaf)
+
+ def _split_leaf(self, leaf):
+ sorted_keys = list(sorted(leaf.keys()))
+ n = len(sorted_keys) / 2
+
+ self._make_split_leaf(leaf, sorted_keys[:n])
+ self._make_split_leaf(leaf, sorted_keys[n:])
+
+ def _make_split_leaf(self, leaf, sorted_keys):
+ new = obnamlib.CowLeaf()
+ for key in sorted_keys:
+ new.insert(key, leaf.lookup(key))
+ new_id = self._store.put_leaf(new)
+ self._leaf_list.insert_leaf(sorted_keys[0], sorted_keys[-1], new_id)
def commit(self):
- return self._store.put_leaf(self._leaf)
+ fake_leaf = obnamlib.CowLeaf()
+ fake_leaf.insert('leaf_list', self._leaf_list.as_dict())
+ list_id = self._store.put_leaf(fake_leaf)
+ self._store.flush()
+ return list_id
+
+
+class _LeafList(object):
+
+ def __init__(self):
+ self._leaf_list = []
+
+ def as_dict(self):
+ # This isn't really returning a dict, but that's OK. We only
+ # need to return something that serialise_object can handle.
+ return copy.deepcopy(self._leaf_list)
+
+ def from_dict(self, some_dict):
+ self._leaf_list = some_dict
+
+ def find_leaf_for_key(self, key):
+ # If there are no leaves, we can't pick one for key.
+ if not self._leaf_list:
+ return None
+
+ # Pick last one if key is too big.
+ last = self._leaf_list[-1]
+ if key >= last['last_key']:
+ return last['id']
+
+ # Otherwise, pick first leaf whose last key >= key. We now
+ # know there is such a node.
+ for leaf_info in self._leaf_list:
+ if leaf_info['last_key'] >= key:
+ return leaf_info['id']
+
+ # If we get here, something's badly wrong.
+ assert False # pragma: no cover
+
+ def insert_leaf(self, first_key, last_key, leaf_id):
+ leaf_info = {
+ 'first_key': first_key,
+ 'last_key': last_key,
+ 'id': leaf_id,
+ }
+
+ self._leaf_list.append(leaf_info)
+ self._leaf_list.sort(key=lambda li: li['first_key'])
+
+ def update_leaf(self, leaf_id, first_key, last_key):
+ for leaf_info in self._leaf_list:
+ if leaf_info['id'] == leaf_id:
+ leaf_info['first_key'] = first_key
+ leaf_info['last_key'] = last_key
+
+ def drop_leaf(self, leaf_id):
+ self._leaf_list = [
+ leaf_info
+ for leaf_info in self._leaf_list
+ if leaf_info['id'] != leaf_id
+ ]