diff options
author | Lars Wirzenius <liw@liw.fi> | 2011-06-06 15:23:23 +0100 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2011-06-06 15:23:23 +0100 |
commit | fa58a97e58aeba1ce40a7bb701df25ce8ee084ac (patch) | |
tree | 9db66ef5f79c41186fb246ec74d668c34ff42699 | |
parent | 1685376963e33353342eeb697aa68c5b17f6f06d (diff) | |
download | larch-fa58a97e58aeba1ce40a7bb701df25ce8ee084ac.tar.gz |
Documentation of BTree class.
-rw-r--r-- | doc/index.rst | 2 | ||||
-rw-r--r-- | larch/forest_tests.py | 4 | ||||
-rw-r--r-- | larch/tree.py | 53 | ||||
-rw-r--r-- | larch/tree_tests.py | 12 |
4 files changed, 37 insertions, 34 deletions
diff --git a/doc/index.rst b/doc/index.rst index fd39563..e8799aa 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -29,7 +29,7 @@ only possible within a forest. Thus, also, only trees in the same forest can share nodes. All **keys** in a all trees in a forest must be string of the same size. **Values** are strings and are stored in **nodes**, and -can be of any size, almost up to the size of a node. +can be of any size, almost up to half the size of a node. When creating a forest, you must specify the sizes of keys and nodes, and the directory in which everything gets stored:: diff --git a/larch/forest_tests.py b/larch/forest_tests.py index e1ab67c..585d546 100644 --- a/larch/forest_tests.py +++ b/larch/forest_tests.py @@ -55,8 +55,8 @@ class ForestTests(unittest.TestCase): def test_clones_do_not_clash_in_new_node_ids(self): t1 = self.forest.new_tree() t2 = self.forest.new_tree(t1) - node1 = t1.new_leaf([], []) - node2 = t2.new_leaf([], []) + node1 = t1._new_leaf([], []) + node2 = t2._new_leaf([], []) self.assertEqual(node1.id + 1, node2.id) def test_is_persistent(self): diff --git a/larch/tree.py b/larch/tree.py index 370b3ea..4bf7b72 100644 --- a/larch/tree.py +++ b/larch/tree.py @@ -51,12 +51,15 @@ class ValueTooLarge(Exception): class BTree(object): - '''B-tree. + '''A balanced search tree (copy-on-write B-tree). - The nodes are stored in an external node store; see the NodeStore - class. Key sizes are fixed, and given in bytes. Values may be of - any size up to slightly less than half the maximum size of a node. - See NodeStore.max_value_size for the exact value. + The tree belongs to a forest. The tree nodes are stored in an + external node store; see the ``NodeStore`` class. + + ``root_id`` gives the id of the root node of the tree. The + root node must be unique to this tree, as it is modified in + place. ``root_id`` may also be ``None``, in which case a + new node is created automatically to serve as the root node. ''' @@ -74,27 +77,27 @@ class BTree(object): tracing.trace('init BTree %s with root_id %s' % (self, root_id)) - def check_key_size(self, key): + def _check_key_size(self, key): if len(key) != self.node_store.codec.key_bytes: raise KeySizeMismatch(key, self.node_store.codec.key_bytes) - def check_value_size(self, value): + def _check_value_size(self, value): if len(value) > self.node_store.max_value_size: raise ValueTooLarge(value, self.node_store.max_value_size) - def new_id(self): + def _new_id(self): '''Generate a new node identifier.''' return self.forest._new_id() - def new_leaf(self, keys, values): + def _new_leaf(self, keys, values): '''Create a new leaf node.''' - node = larch.LeafNode(self.new_id(), keys, values) + node = larch.LeafNode(self._new_id(), keys, values) tracing.trace('id=%s' % node.id) return node def new_index(self, keys, values): '''Create a new index node.''' - index = larch.IndexNode(self.new_id(), keys, values) + index = larch.IndexNode(self._new_id(), keys, values) for child_id in values: self.increment(child_id) tracing.trace('id=%s' % index.id) @@ -134,7 +137,7 @@ class BTree(object): ''' - self.check_key_size(key) + self._check_key_size(key) node = self.root while isinstance(node, larch.IndexNode): @@ -156,8 +159,8 @@ class BTree(object): ''' - self.check_key_size(minkey) - self.check_key_size(maxkey) + self._check_key_size(minkey) + self._check_key_size(maxkey) if self.root is not None: for pair in self._lookup_range(self.root.id, minkey, maxkey): yield pair @@ -182,8 +185,8 @@ class BTree(object): ''' - self.check_key_size(minkey) - self.check_key_size(maxkey) + self._check_key_size(minkey) + self._check_key_size(maxkey) if self.root is None: return True return self._range_is_empty(self.root.id, minkey, maxkey) @@ -208,7 +211,7 @@ class BTree(object): elif isinstance(node, larch.IndexNode): new = self.new_index(node.keys(), node.values()) else: - new = self.new_leaf(node.keys(), node.values()) + new = self._new_leaf(node.keys(), node.values()) new.size = node.size return new @@ -222,14 +225,14 @@ class BTree(object): tracing.trace('key=%s' % repr(key)) tracing.trace('value=%s' % repr(value)) - self.check_key_size(key) - self.check_value_size(value) + self._check_key_size(key) + self._check_value_size(value) # Is the tree empty? This needs special casing to keep # _insert_into_index simpler. if self.root is None or len(self.root) == 0: tracing.trace('tree is empty') - leaf = self.new_leaf([key], [value]) + leaf = self._new_leaf([key], [value]) self.put_node(leaf) if self.root is None: new_root = self.new_index([key], [leaf.id]) @@ -292,7 +295,7 @@ class BTree(object): n = len(new_index) / 2 keys = new_index.keys()[n:] values = new_index.values()[n:] - new = larch.IndexNode(self.new_id(), keys, values) + new = larch.IndexNode(self._new_id(), keys, values) tracing.trace('new index node id=%s' % new.id) for k in keys: new_index.remove(k) @@ -327,7 +330,7 @@ class BTree(object): values = new.values() n = len(keys) / 2 - new2 = self.new_leaf(keys[n:], values[n:]) + new2 = self._new_leaf(keys[n:], values[n:]) for key in new2: new.remove(key) assert size(new) > 0 @@ -363,7 +366,7 @@ class BTree(object): ''' tracing.trace('key=%s' % repr(key)) - self.check_key_size(key) + self._check_key_size(key) if self.root is None: tracing.trace('no root') @@ -486,8 +489,8 @@ class BTree(object): ''' - self.check_key_size(minkey) - self.check_key_size(maxkey) + self._check_key_size(minkey) + self._check_key_size(maxkey) keys = [k for k, v in self.lookup_range(minkey, maxkey)] for key in keys: self.remove(key) diff --git a/larch/tree_tests.py b/larch/tree_tests.py index 4fe9b8e..04385fb 100644 --- a/larch/tree_tests.py +++ b/larch/tree_tests.py @@ -73,7 +73,7 @@ class BTreeTests(unittest.TestCase): self.dump = False def test_shadow_increments_childrens_refcounts(self): - leaf = self.tree.new_leaf(['foo'], ['bar']) + leaf = self.tree._new_leaf(['foo'], ['bar']) index = self.tree.new_index([leaf.first_key()], [leaf.id]) self.assertEqual(self.ns.get_refcount(leaf.id), 1) self.ns.set_refcount(index.id, 2) @@ -81,7 +81,7 @@ class BTreeTests(unittest.TestCase): self.assertEqual(self.ns.get_refcount(leaf.id), 2) def test_shadow_returns_new_leaf_if_cannot_be_modified(self): - node = self.tree.new_leaf(['foo'], ['bar']) + node = self.tree._new_leaf(['foo'], ['bar']) self.tree.put_node(node) self.ns.set_refcount(node.id, 2) node2 = self.tree._shadow(node) @@ -102,7 +102,7 @@ class BTreeTests(unittest.TestCase): self.assertEqual(node2.id, node.id) def test_new_leaf_does_not_put_node_into_store(self): - leaf = self.tree.new_leaf([], []) + leaf = self.tree._new_leaf([], []) self.assertRaises(larch.NodeMissing, self.tree.get_node, leaf.id) def test_new_index_does_not_put_node_into_store(self): @@ -110,7 +110,7 @@ class BTreeTests(unittest.TestCase): self.assertRaises(larch.NodeMissing, self.tree.get_node, index.id) def test_new_index_increments_childrens_refcounts(self): - leaf = self.tree.new_leaf([], []) + leaf = self.tree._new_leaf([], []) self.tree.put_node(leaf) self.assertEqual(self.ns.get_refcount(leaf.id), 0) self.tree.new_index(['foo'], [leaf.id]) @@ -331,9 +331,9 @@ class BTreeTests(unittest.TestCase): def test_last_node_id_persists(self): self.tree.insert('foo', 'bar') # make tree has root - node1 = self.tree.new_leaf([], []) + node1 = self.tree._new_leaf([], []) tree2 = larch.BTree(self.forest, self.ns, self.tree.root.id) - node2 = tree2.new_leaf([], []) + node2 = tree2._new_leaf([], []) self.assertEqual(node1.id + 1, node2.id) def test_lookup_range_returns_empty_list_if_nothing_found(self): |