diff options
author | Lars Wirzenius <liw@liw.fi> | 2016-08-13 18:06:26 +0300 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2016-08-14 10:13:13 +0300 |
commit | 022aa0cba1102fa234c8b5d9f4fa9e3fbc108cc0 (patch) | |
tree | 13fa85476f92a37fba12dafce6650fd9fef5c514 /obnamlib/fmt_ga/cowtree.py | |
parent | b88b51305f7b96077e5ee0517248dc45029c9d4f (diff) | |
download | obnam-022aa0cba1102fa234c8b5d9f4fa9e3fbc108cc0.tar.gz |
Break CowTree into many nodes
Diffstat (limited to 'obnamlib/fmt_ga/cowtree.py')
-rw-r--r-- | obnamlib/fmt_ga/cowtree.py | 118 |
1 files changed, 113 insertions, 5 deletions
diff --git a/obnamlib/fmt_ga/cowtree.py b/obnamlib/fmt_ga/cowtree.py index 7500b089..f1f74385 100644 --- a/obnamlib/fmt_ga/cowtree.py +++ b/obnamlib/fmt_ga/cowtree.py @@ -16,6 +16,8 @@ # =*= License: GPL-3+ =*= +import copy + import obnamlib @@ -23,19 +25,125 @@ class CowTree(object): def __init__(self): self._store = None - self._leaf = obnamlib.CowLeaf() + self._leaf_list = _LeafList() + self._max_keys_per_leaf = 1024 # FIXME: This should be configurable? def set_leaf_store(self, leaf_store): self._store = leaf_store def set_list_node(self, leaf_id): - self._leaf = self._store.get_leaf(leaf_id) + fake_leaf = self._store.get_leaf(leaf_id) + self._leaf_list.from_dict(fake_leaf.lookup('leaf_list')) + + def set_max_leaf_size(self, max_keys): + assert max_keys >= 2 + self._max_keys_per_leaf = max_keys def lookup(self, key): - return self._leaf.lookup(key) + leaf_id = self._leaf_list.find_leaf_for_key(key) + if leaf_id is None: + return None + + leaf = self._store.get_leaf(leaf_id) + assert leaf is not None + return leaf.lookup(key) def insert(self, key, value): - self._leaf.insert(key, value) + leaf_id = self._leaf_list.find_leaf_for_key(key) + if leaf_id is None: + self._add_new_leaf(key, value) + else: + self._insert_into_leaf(leaf_id, key, value) + + def _add_new_leaf(self, key, value): + leaf = obnamlib.CowLeaf() + leaf.insert(key, value) + leaf_id = self._store.put_leaf(leaf) + self._leaf_list.insert_leaf(key, key, leaf_id) + + def _insert_into_leaf(self, leaf_id, key, value): + leaf = self._store.get_leaf(leaf_id) + assert leaf is not None + leaf.insert(key, value) + keys = list(sorted(leaf.keys())) + self._leaf_list.update_leaf(leaf_id, keys[0], keys[-1]) + if len(leaf) > self._max_keys_per_leaf: + self._leaf_list.drop_leaf(leaf_id) + self._split_leaf(leaf) + + def _split_leaf(self, leaf): + sorted_keys = list(sorted(leaf.keys())) + n = len(sorted_keys) / 2 + + self._make_split_leaf(leaf, sorted_keys[:n]) + self._make_split_leaf(leaf, sorted_keys[n:]) + + def _make_split_leaf(self, leaf, sorted_keys): + new = obnamlib.CowLeaf() + for key in sorted_keys: + new.insert(key, leaf.lookup(key)) + new_id = self._store.put_leaf(new) + self._leaf_list.insert_leaf(sorted_keys[0], sorted_keys[-1], new_id) def commit(self): - return self._store.put_leaf(self._leaf) + fake_leaf = obnamlib.CowLeaf() + fake_leaf.insert('leaf_list', self._leaf_list.as_dict()) + list_id = self._store.put_leaf(fake_leaf) + self._store.flush() + return list_id + + +class _LeafList(object): + + def __init__(self): + self._leaf_list = [] + + def as_dict(self): + # This isn't really returning a dict, but that's OK. We only + # need to return something that serialise_object can handle. + return copy.deepcopy(self._leaf_list) + + def from_dict(self, some_dict): + self._leaf_list = some_dict + + def find_leaf_for_key(self, key): + # If there are no leaves, we can't pick one for key. + if not self._leaf_list: + return None + + # Pick last one if key is too big. + last = self._leaf_list[-1] + if key >= last['last_key']: + return last['id'] + + # Otherwise, pick first leaf whose last key >= key. We now + # know there is such a node. + for leaf_info in self._leaf_list: + if leaf_info['last_key'] >= key: + return leaf_info['id'] + + # If we get here, something's badly wrong. + assert False # pragma: no cover + + def insert_leaf(self, first_key, last_key, leaf_id): + leaf_info = { + 'first_key': first_key, + 'last_key': last_key, + 'id': leaf_id, + } + + self._leaf_list.append(leaf_info) + self._leaf_list.sort(key=lambda li: li['first_key']) + + def update_leaf(self, leaf_id, first_key, last_key): + for leaf_info in self._leaf_list: + if leaf_info['id'] == leaf_id: + leaf_info['first_key'] = first_key + leaf_info['last_key'] = last_key + + def drop_leaf(self, leaf_id): + self._leaf_list = [ + leaf_info + for leaf_info in self._leaf_list + if leaf_info['id'] != leaf_id + ] |