summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2011-06-06 15:23:23 +0100
committerLars Wirzenius <liw@liw.fi>2011-06-06 15:23:23 +0100
commitfa58a97e58aeba1ce40a7bb701df25ce8ee084ac (patch)
tree9db66ef5f79c41186fb246ec74d668c34ff42699
parent1685376963e33353342eeb697aa68c5b17f6f06d (diff)
downloadlarch-fa58a97e58aeba1ce40a7bb701df25ce8ee084ac.tar.gz
Documentation of BTree class.
-rw-r--r--doc/index.rst2
-rw-r--r--larch/forest_tests.py4
-rw-r--r--larch/tree.py53
-rw-r--r--larch/tree_tests.py12
4 files changed, 37 insertions, 34 deletions
diff --git a/doc/index.rst b/doc/index.rst
index fd39563..e8799aa 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -29,7 +29,7 @@ only possible within a forest. Thus, also, only trees in the same
forest can share nodes. All **keys** in a all trees in a forest
must be string of the same size. **Values** are strings and
are stored in **nodes**, and
-can be of any size, almost up to the size of a node.
+can be of any size, almost up to half the size of a node.
When creating a forest, you must specify the sizes of keys and
nodes, and the directory in which everything gets stored::
diff --git a/larch/forest_tests.py b/larch/forest_tests.py
index e1ab67c..585d546 100644
--- a/larch/forest_tests.py
+++ b/larch/forest_tests.py
@@ -55,8 +55,8 @@ class ForestTests(unittest.TestCase):
def test_clones_do_not_clash_in_new_node_ids(self):
t1 = self.forest.new_tree()
t2 = self.forest.new_tree(t1)
- node1 = t1.new_leaf([], [])
- node2 = t2.new_leaf([], [])
+ node1 = t1._new_leaf([], [])
+ node2 = t2._new_leaf([], [])
self.assertEqual(node1.id + 1, node2.id)
def test_is_persistent(self):
diff --git a/larch/tree.py b/larch/tree.py
index 370b3ea..4bf7b72 100644
--- a/larch/tree.py
+++ b/larch/tree.py
@@ -51,12 +51,15 @@ class ValueTooLarge(Exception):
class BTree(object):
- '''B-tree.
+ '''A balanced search tree (copy-on-write B-tree).
- The nodes are stored in an external node store; see the NodeStore
- class. Key sizes are fixed, and given in bytes. Values may be of
- any size up to slightly less than half the maximum size of a node.
- See NodeStore.max_value_size for the exact value.
+ The tree belongs to a forest. The tree nodes are stored in an
+ external node store; see the ``NodeStore`` class.
+
+ ``root_id`` gives the id of the root node of the tree. The
+ root node must be unique to this tree, as it is modified in
+ place. ``root_id`` may also be ``None``, in which case a
+ new node is created automatically to serve as the root node.
'''
@@ -74,27 +77,27 @@ class BTree(object):
tracing.trace('init BTree %s with root_id %s' % (self, root_id))
- def check_key_size(self, key):
+ def _check_key_size(self, key):
if len(key) != self.node_store.codec.key_bytes:
raise KeySizeMismatch(key, self.node_store.codec.key_bytes)
- def check_value_size(self, value):
+ def _check_value_size(self, value):
if len(value) > self.node_store.max_value_size:
raise ValueTooLarge(value, self.node_store.max_value_size)
- def new_id(self):
+ def _new_id(self):
'''Generate a new node identifier.'''
return self.forest._new_id()
- def new_leaf(self, keys, values):
+ def _new_leaf(self, keys, values):
'''Create a new leaf node.'''
- node = larch.LeafNode(self.new_id(), keys, values)
+ node = larch.LeafNode(self._new_id(), keys, values)
tracing.trace('id=%s' % node.id)
return node
def new_index(self, keys, values):
'''Create a new index node.'''
- index = larch.IndexNode(self.new_id(), keys, values)
+ index = larch.IndexNode(self._new_id(), keys, values)
for child_id in values:
self.increment(child_id)
tracing.trace('id=%s' % index.id)
@@ -134,7 +137,7 @@ class BTree(object):
'''
- self.check_key_size(key)
+ self._check_key_size(key)
node = self.root
while isinstance(node, larch.IndexNode):
@@ -156,8 +159,8 @@ class BTree(object):
'''
- self.check_key_size(minkey)
- self.check_key_size(maxkey)
+ self._check_key_size(minkey)
+ self._check_key_size(maxkey)
if self.root is not None:
for pair in self._lookup_range(self.root.id, minkey, maxkey):
yield pair
@@ -182,8 +185,8 @@ class BTree(object):
'''
- self.check_key_size(minkey)
- self.check_key_size(maxkey)
+ self._check_key_size(minkey)
+ self._check_key_size(maxkey)
if self.root is None:
return True
return self._range_is_empty(self.root.id, minkey, maxkey)
@@ -208,7 +211,7 @@ class BTree(object):
elif isinstance(node, larch.IndexNode):
new = self.new_index(node.keys(), node.values())
else:
- new = self.new_leaf(node.keys(), node.values())
+ new = self._new_leaf(node.keys(), node.values())
new.size = node.size
return new
@@ -222,14 +225,14 @@ class BTree(object):
tracing.trace('key=%s' % repr(key))
tracing.trace('value=%s' % repr(value))
- self.check_key_size(key)
- self.check_value_size(value)
+ self._check_key_size(key)
+ self._check_value_size(value)
# Is the tree empty? This needs special casing to keep
# _insert_into_index simpler.
if self.root is None or len(self.root) == 0:
tracing.trace('tree is empty')
- leaf = self.new_leaf([key], [value])
+ leaf = self._new_leaf([key], [value])
self.put_node(leaf)
if self.root is None:
new_root = self.new_index([key], [leaf.id])
@@ -292,7 +295,7 @@ class BTree(object):
n = len(new_index) / 2
keys = new_index.keys()[n:]
values = new_index.values()[n:]
- new = larch.IndexNode(self.new_id(), keys, values)
+ new = larch.IndexNode(self._new_id(), keys, values)
tracing.trace('new index node id=%s' % new.id)
for k in keys:
new_index.remove(k)
@@ -327,7 +330,7 @@ class BTree(object):
values = new.values()
n = len(keys) / 2
- new2 = self.new_leaf(keys[n:], values[n:])
+ new2 = self._new_leaf(keys[n:], values[n:])
for key in new2:
new.remove(key)
assert size(new) > 0
@@ -363,7 +366,7 @@ class BTree(object):
'''
tracing.trace('key=%s' % repr(key))
- self.check_key_size(key)
+ self._check_key_size(key)
if self.root is None:
tracing.trace('no root')
@@ -486,8 +489,8 @@ class BTree(object):
'''
- self.check_key_size(minkey)
- self.check_key_size(maxkey)
+ self._check_key_size(minkey)
+ self._check_key_size(maxkey)
keys = [k for k, v in self.lookup_range(minkey, maxkey)]
for key in keys:
self.remove(key)
diff --git a/larch/tree_tests.py b/larch/tree_tests.py
index 4fe9b8e..04385fb 100644
--- a/larch/tree_tests.py
+++ b/larch/tree_tests.py
@@ -73,7 +73,7 @@ class BTreeTests(unittest.TestCase):
self.dump = False
def test_shadow_increments_childrens_refcounts(self):
- leaf = self.tree.new_leaf(['foo'], ['bar'])
+ leaf = self.tree._new_leaf(['foo'], ['bar'])
index = self.tree.new_index([leaf.first_key()], [leaf.id])
self.assertEqual(self.ns.get_refcount(leaf.id), 1)
self.ns.set_refcount(index.id, 2)
@@ -81,7 +81,7 @@ class BTreeTests(unittest.TestCase):
self.assertEqual(self.ns.get_refcount(leaf.id), 2)
def test_shadow_returns_new_leaf_if_cannot_be_modified(self):
- node = self.tree.new_leaf(['foo'], ['bar'])
+ node = self.tree._new_leaf(['foo'], ['bar'])
self.tree.put_node(node)
self.ns.set_refcount(node.id, 2)
node2 = self.tree._shadow(node)
@@ -102,7 +102,7 @@ class BTreeTests(unittest.TestCase):
self.assertEqual(node2.id, node.id)
def test_new_leaf_does_not_put_node_into_store(self):
- leaf = self.tree.new_leaf([], [])
+ leaf = self.tree._new_leaf([], [])
self.assertRaises(larch.NodeMissing, self.tree.get_node, leaf.id)
def test_new_index_does_not_put_node_into_store(self):
@@ -110,7 +110,7 @@ class BTreeTests(unittest.TestCase):
self.assertRaises(larch.NodeMissing, self.tree.get_node, index.id)
def test_new_index_increments_childrens_refcounts(self):
- leaf = self.tree.new_leaf([], [])
+ leaf = self.tree._new_leaf([], [])
self.tree.put_node(leaf)
self.assertEqual(self.ns.get_refcount(leaf.id), 0)
self.tree.new_index(['foo'], [leaf.id])
@@ -331,9 +331,9 @@ class BTreeTests(unittest.TestCase):
def test_last_node_id_persists(self):
self.tree.insert('foo', 'bar') # make tree has root
- node1 = self.tree.new_leaf([], [])
+ node1 = self.tree._new_leaf([], [])
tree2 = larch.BTree(self.forest, self.ns, self.tree.root.id)
- node2 = tree2.new_leaf([], [])
+ node2 = tree2._new_leaf([], [])
self.assertEqual(node1.id + 1, node2.id)
def test_lookup_range_returns_empty_list_if_nothing_found(self):