summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2010-12-30 20:18:03 +0000
committerLars Wirzenius <liw@liw.fi>2010-12-30 20:18:03 +0000
commit8e59339a2a7f8d697360dcc662187bd178c3babc (patch)
tree6b4b6a71352789e4da403c1cd7c7c4762728740f
parent26d30b8c228e005dcec03453b1592a5e93c625f0 (diff)
downloadlarch-8e59339a2a7f8d697360dcc662187bd178c3babc.tar.gz
Change Nodes to get keys and values as separate lists.
-rw-r--r--btree/codec.py12
-rw-r--r--btree/codec_tests.py18
-rw-r--r--btree/nodes.py11
-rw-r--r--btree/nodes_tests.py67
-rw-r--r--btree/nodestore.py16
-rw-r--r--btree/nodestore_disk_tests.py8
-rw-r--r--btree/tree.py11
-rw-r--r--btree/uploadqueue_tests.py14
-rwxr-xr-xcodec-speed18
9 files changed, 97 insertions, 78 deletions
diff --git a/btree/codec.py b/btree/codec.py
index 668b9b2..92cc9c3 100644
--- a/btree/codec.py
+++ b/btree/codec.py
@@ -57,7 +57,8 @@ class NodeCodec(object):
def encode_leaf(self, node):
'''Encode a leaf node as a byte string.'''
- keys, values = zip(*node.pairs()) or [(), ()]
+ keys = node.keys()
+ values = node.values()
return (self.leaf_header.pack('ORBL', node.id, len(keys)) +
''.join(keys) +
struct.pack('!%dI' % len(values), *map(len, values)) +
@@ -79,10 +80,9 @@ class NodeCodec(object):
offset = self.leaf_header.size + self.leaf_pair_fixed_size * num_pairs
append = values.append
for length in lengths:
- append(encoded[offset:offset + length])
- offset += length
- pairs = zip(keys, values)
- return btree.LeafNode(node_id, pairs)
+ append(encoded[offset:offset + length])
+ offset += length
+ return btree.LeafNode(node_id, keys, values)
def max_index_pairs(self, node_size):
'''Return number of index pairs that fit in a node of a given size.'''
@@ -115,7 +115,7 @@ class NodeCodec(object):
assert len(keys) == len(child_ids)
for x in child_ids:
assert type(x) == int
- return btree.IndexNode(node_id, zip(keys, child_ids))
+ return btree.IndexNode(node_id, keys, child_ids)
def encode(self, node):
if isinstance(node, btree.LeafNode):
diff --git a/btree/codec_tests.py b/btree/codec_tests.py
index 4855f7e..4d8fdfa 100644
--- a/btree/codec_tests.py
+++ b/btree/codec_tests.py
@@ -22,10 +22,8 @@ import btree
class NodeCodecTests(unittest.TestCase):
def setUp(self):
- self.leaf = btree.LeafNode(1234, [('foo', 'bar'), ('yoo', 'yoyo')])
- self.index = btree.IndexNode(5678,
- [('bar', 1234),
- ('foo', 7890)])
+ self.leaf = btree.LeafNode(1234, ['foo', 'yoo'], ['bar', 'yoyo'])
+ self.index = btree.IndexNode(5678, ['bar', 'foo'], [1234, 7890])
self.codec = btree.NodeCodec(3)
def test_returns_reasonable_size_for_empty_leaf(self):
@@ -35,20 +33,24 @@ class NodeCodecTests(unittest.TestCase):
self.assert_(self.codec.index_size([]) > 10)
def test_returns_reasonable_size_for_empty_leaf_generic(self):
- leaf = btree.LeafNode(0, [])
+ leaf = btree.LeafNode(0, [], [])
self.assert_(self.codec.size(leaf) > 10)
def test_returns_reasonable_size_for_empty_index_generic(self):
- index = btree.IndexNode(0, [])
+ index = btree.IndexNode(0, [], [])
self.assert_(self.codec.size(index) > 10)
def test_leaf_round_trip_ok(self):
encoded = self.codec.encode_leaf(self.leaf)
- self.assertEqual(self.codec.decode_leaf(encoded), self.leaf)
+ decoded = self.codec.decode_leaf(encoded)
+ self.assertEqual(decoded, self.leaf)
def test_index_round_trip_ok(self):
encoded = self.codec.encode_index(self.index)
- self.assertEqual(self.codec.decode_index(encoded), self.index)
+ decoded = self.codec.decode_index(encoded)
+ self.assertEqual(decoded.keys(), self.index.keys())
+ self.assertEqual(decoded.values(), self.index.values())
+ self.assertEqual(decoded, self.index)
def test_generic_round_trip_ok_for_leaf(self):
encoded = self.codec.encode(self.leaf)
diff --git a/btree/nodes.py b/btree/nodes.py
index d263c75..85b10b8 100644
--- a/btree/nodes.py
+++ b/btree/nodes.py
@@ -29,11 +29,12 @@ class Node(object):
'''
- def __init__(self, node_id, pairs=None):
- pairs = pairs or []
- self._keys = [k for k, v in pairs]
- self._values = [v for k, v in pairs]
- self._dict = dict(pairs)
+ def __init__(self, node_id, keys, values):
+ self._keys = list(keys)
+ self._values = list(values)
+ self._dict = dict()
+ for i in range(len(keys)):
+ self._dict[keys[i]] = values[i]
self.id = node_id
self.size = None
diff --git a/btree/nodes_tests.py b/btree/nodes_tests.py
index 3f92eef..fcfdb83 100644
--- a/btree/nodes_tests.py
+++ b/btree/nodes_tests.py
@@ -25,7 +25,9 @@ class NodeTests(unittest.TestCase):
self.node_id = 12765
self.pairs = [('key2', 'value2'), ('key1', 'value1')]
self.pairs.sort()
- self.node = btree.nodes.Node(self.node_id, self.pairs)
+ self.keys = [k for k, v in self.pairs]
+ self.values = [v for k, v in self.pairs]
+ self.node = btree.nodes.Node(self.node_id, self.keys, self.values)
def test_has_id(self):
self.assertEqual(self.node.id, self.node_id)
@@ -78,77 +80,79 @@ class NodeTests(unittest.TestCase):
[('key2', 'value2')])
def test_adds_key_value_pair_to_empty_node(self):
- node = btree.nodes.Node(0, [])
+ node = btree.nodes.Node(0, [], [])
node.add('foo', 'bar')
self.assertEqual(node.pairs(), [('foo', 'bar')])
self.assertEqual(node['foo'], 'bar')
def test_adds_key_value_pair_to_end_of_node_of_one_element(self):
- node = btree.nodes.Node(0, [('foo', 'bar')])
- node.add('foo2', 'bar')
- self.assertEqual(node.pairs(), [('foo', 'bar'), ('foo2', 'bar')])
- self.assertEqual(node['foo2'], 'bar')
+ node = btree.nodes.Node(0, ['foo'], ['bar'])
+ node.add('foo2', 'bar2')
+ self.assertEqual(node.keys(), ['foo', 'foo2'])
+ self.assertEqual(node.values(), ['bar', 'bar2'])
+ self.assertEqual(node['foo2'], 'bar2')
def test_adds_key_value_pair_to_beginning_of_node_of_one_element(self):
- node = btree.nodes.Node(0, [('foo', 'bar')])
+ node = btree.nodes.Node(0, ['foo'], ['bar'])
node.add('bar', 'bar')
- self.assertEqual(node.pairs(), [('bar', 'bar'), ('foo', 'bar')])
+ self.assertEqual(node.keys(), ['bar', 'foo'])
+ self.assertEqual(node.values(), ['bar', 'bar'])
self.assertEqual(node['bar'], 'bar')
def test_adds_key_value_pair_to_middle_of_node_of_two_elements(self):
- node = btree.nodes.Node(0, [('bar', 'bar'), ('foo', 'bar')])
+ node = btree.nodes.Node(0, ['bar', 'foo'], ['bar', 'bar'])
node.add('duh', 'bar')
self.assertEqual(node.pairs(), [('bar', 'bar'), ('duh', 'bar'),
('foo', 'bar')])
self.assertEqual(node['duh'], 'bar')
def test_add_replaces_value_for_existing_key(self):
- node = btree.nodes.Node(0, [('bar', 'bar'), ('foo', 'bar')])
+ node = btree.nodes.Node(0, ['bar', 'foo'], ['bar', 'bar'])
node.add('bar', 'xxx')
self.assertEqual(node.pairs(), [('bar', 'xxx'), ('foo', 'bar')])
self.assertEqual(node['bar'], 'xxx')
def test_add_resets_cached_size(self):
- node = btree.nodes.Node(0, [])
+ node = btree.nodes.Node(0, [], [])
node.size = 1234
node.add('foo', 'bar')
self.assertEqual(node.size, None)
def test_removes_first_key(self):
- node = btree.nodes.Node(0, [('bar', 'bar'), ('duh', 'bar'),
- ('foo', 'bar')])
+ node = btree.nodes.Node(0, ['bar', 'duh', 'foo'],
+ ['bar', 'bar', 'bar'])
node.remove('bar')
self.assertEqual(node.pairs(), [('duh', 'bar'), ('foo', 'bar')])
self.assertRaises(KeyError, node.__getitem__, 'bar')
def test_removes_last_key(self):
- node = btree.nodes.Node(0, [('bar', 'bar'), ('duh', 'bar'),
- ('foo', 'bar')])
+ node = btree.nodes.Node(0, ['bar', 'duh', 'foo'],
+ ['bar', 'bar', 'bar'])
node.remove('foo')
self.assertEqual(node.pairs(), [('bar', 'bar'), ('duh', 'bar')])
self.assertRaises(KeyError, node.__getitem__, 'foo')
def test_removes_middle_key(self):
- node = btree.nodes.Node(0, [('bar', 'bar'), ('duh', 'bar'),
- ('foo', 'bar')])
+ node = btree.nodes.Node(0, ['bar', 'duh', 'foo'],
+ ['bar', 'bar', 'bar'])
node.remove('duh')
self.assertEqual(node.pairs(), [('bar', 'bar'), ('foo', 'bar')])
self.assertRaises(KeyError, node.__getitem__, 'duh')
def test_raises_exception_when_removing_unknown_key(self):
- node = btree.nodes.Node(0, [('bar', 'bar'), ('duh', 'bar'),
- ('foo', 'bar')])
+ node = btree.nodes.Node(0, ['bar', 'duh', 'foo'],
+ ['bar', 'bar', 'bar'])
self.assertRaises(KeyError, node.remove, 'yo')
def test_remove_resets_cached_size(self):
- node = btree.nodes.Node(0, [('foo', 'bar')])
+ node = btree.nodes.Node(0, ['foo'], ['bar'])
node.size = 1234
node.remove('foo')
self.assertEqual(node.size, None)
def test_removes_index_range(self):
- node = btree.nodes.Node(0, [('bar', 'bar'), ('duh', 'bar'),
- ('foo', 'bar')])
+ node = btree.nodes.Node(0, ['bar', 'duh', 'foo'],
+ ['bar', 'bar', 'bar'])
node.size = 12375654
node.remove_index_range(1, 5)
self.assertEqual(node.pairs(), [('bar', 'bar')])
@@ -161,7 +165,7 @@ class NodeTests(unittest.TestCase):
bar = ('bar', 'bar')
foo = ('foo', 'foo')
- node = btree.LeafNode(0, [('bar', 'bar'), ('foo', 'foo')])
+ node = btree.LeafNode(0, ['bar', 'foo'], ['bar', 'foo'])
find = node.find_pairs
self.assertEqual(find('aaa', 'aaa'), [])
@@ -185,7 +189,7 @@ class NodeTests(unittest.TestCase):
self.assertEqual(find('ggg', 'ggg'), [])
def test_finds_no_potential_range_in_empty_node(self):
- node = btree.LeafNode(0, [])
+ node = btree.LeafNode(0, [], [])
self.assertEqual(node.find_potential_range('aaa', 'bbb'), (None, None))
def test_finds_potential_ranges(self):
@@ -193,7 +197,7 @@ class NodeTests(unittest.TestCase):
# every combination of minkey and maxkey being less than, equal,
# or greater than either child key (as long as minkey <= maxkey).
- node = btree.LeafNode(0, [('bar', 'bar'), ('foo', 'foo')])
+ node = btree.LeafNode(0, ['bar', 'foo'], ['bar', 'foo'])
find = node.find_potential_range
self.assertEqual(find('aaa', 'aaa'), (None, None))
@@ -223,12 +227,11 @@ class NodeTests(unittest.TestCase):
class IndexNodeTests(unittest.TestCase):
def setUp(self):
- self.leaf1 = btree.LeafNode(0, [('bar', 'bar')])
- self.leaf2 = btree.LeafNode(1, [('foo', 'foo')])
+ self.leaf1 = btree.LeafNode(0, ['bar'], ['bar'])
+ self.leaf2 = btree.LeafNode(1, ['foo'], ['foo'])
self.index_id = 1234
- self.index = btree.IndexNode(self.index_id,
- [('bar', self.leaf1.id),
- ('foo', self.leaf2.id)])
+ self.index = btree.IndexNode(self.index_id, ['bar', 'foo'],
+ [self.leaf1.id, self.leaf2.id])
def test_find_key_for_child_containing(self):
@@ -244,11 +247,11 @@ class IndexNodeTests(unittest.TestCase):
self.assertEqual(self.index.find_key_for_child_containing('a'), None)
def test_finds_no_key_when_node_is_empty(self):
- empty = btree.IndexNode(0, [])
+ empty = btree.IndexNode(0, [], [])
self.assertEqual(empty.find_key_for_child_containing('f00'), None)
def test_finds_no_children_in_range_when_empty(self):
- empty = btree.IndexNode(0, [])
+ empty = btree.IndexNode(0, [], [])
self.assertEqual(empty.find_children_in_range('bar', 'foo'), [])
def test_finds_children_in_ranges(self):
diff --git a/btree/nodestore.py b/btree/nodestore.py
index 34dd5e9..317a936 100644
--- a/btree/nodestore.py
+++ b/btree/nodestore.py
@@ -231,13 +231,13 @@ class NodeStoreTests(object): # pragma: no cover
self.assertEqual(self.ns.list_nodes(), [])
def test_puts_and_gets_same(self):
- node = btree.LeafNode(0, [])
+ node = btree.LeafNode(0, [], [])
self.ns.put_node(node)
self.ns.push_upload_queue()
self.assertEqualNodes(self.ns.get_node(0), node)
def test_removes_node(self):
- node = btree.LeafNode(0, [])
+ node = btree.LeafNode(0, [], [])
self.ns.put_node(node)
self.ns.push_upload_queue()
self.ns.remove_node(0)
@@ -245,31 +245,31 @@ class NodeStoreTests(object): # pragma: no cover
self.assertEqual(self.ns.list_nodes(), [])
def test_removes_node_from_upload_queue_if_one_exists(self):
- node = btree.LeafNode(0, [])
+ node = btree.LeafNode(0, [], [])
self.ns.put_node(node)
self.ns.remove_node(0)
self.assertRaises(NodeMissing, self.ns.get_node, 0)
self.assertEqual(self.ns.list_nodes(), [])
def test_lists_node_zero(self):
- node = btree.LeafNode(0, [])
+ node = btree.LeafNode(0, [], [])
self.ns.put_node(node)
self.ns.push_upload_queue()
node_ids = self.ns.list_nodes()
self.assertEqual(node_ids, [node.id])
def test_put_allows_to_overwrite_a_node(self):
- node = btree.LeafNode(0, [])
+ node = btree.LeafNode(0, [], [])
self.ns.put_node(node)
- node = btree.LeafNode(0, [('foo', 'bar')])
+ node = btree.LeafNode(0, ['foo'], ['bar'])
self.ns.put_node(node)
self.assertEqual(self.ns.get_node(0).pairs(), [('foo', 'bar')])
def test_put_allows_to_overwrite_a_node_after_upload_queue_push(self):
- node = btree.LeafNode(0, [])
+ node = btree.LeafNode(0, [], [])
self.ns.put_node(node)
self.ns.push_upload_queue()
- node = btree.LeafNode(0, [('foo', 'bar')])
+ node = btree.LeafNode(0, ['foo'], ['bar'])
self.ns.put_node(node)
self.ns.push_upload_queue()
self.assertEqual(self.ns.get_node(0).pairs(), [('foo', 'bar')])
diff --git a/btree/nodestore_disk_tests.py b/btree/nodestore_disk_tests.py
index 73da192..f3a3f85 100644
--- a/btree/nodestore_disk_tests.py
+++ b/btree/nodestore_disk_tests.py
@@ -84,14 +84,14 @@ class NodeStoreDiskTests(unittest.TestCase, btree.NodeStoreTests):
self.assertEqual(ns2.get_refcount(0), 1234)
def test_put_refuses_too_large_a_node(self):
- node = btree.LeafNode(0, [('000', 'x' * (self.node_size + 1))])
+ node = btree.LeafNode(0, ['000'], ['x' * (self.node_size + 1)])
def helper(node):
self.ns.put_node(node)
self.ns.push_upload_queue()
self.assertRaises(btree.NodeTooBig, helper, node)
def test_puts_and_gets_same_with_cache_emptied(self):
- node = btree.LeafNode(0, [])
+ node = btree.LeafNode(0, [], [])
self.ns.put_node(node)
self.ns.cache = lru.LRUCache(100)
self.assertEqualNodes(self.ns.get_node(0), node)
@@ -101,7 +101,7 @@ class NodeStoreDiskTests(unittest.TestCase, btree.NodeStoreTests):
self.ns.upload_queue.max = self.ns.upload_max
ids = range(self.ns.upload_max + 1)
for i in ids:
- node = btree.LeafNode(i, [])
+ node = btree.LeafNode(i, [], [])
self.ns.put_node(node)
self.assertEqual(sorted(self.ns.list_nodes()), ids)
for node_id in ids:
@@ -109,7 +109,7 @@ class NodeStoreDiskTests(unittest.TestCase, btree.NodeStoreTests):
self.assertEqual(self.ns.get_node(node_id).id, node_id)
def test_gets_node_from_disk(self):
- node = btree.LeafNode(0, [])
+ node = btree.LeafNode(0, [], [])
self.ns.put_node(node)
self.ns.push_upload_queue()
ns2 = self.new_ns()
diff --git a/btree/tree.py b/btree/tree.py
index cb4692c..1386390 100644
--- a/btree/tree.py
+++ b/btree/tree.py
@@ -74,13 +74,15 @@ class BTree(object):
def new_leaf(self, pairs):
'''Create a new leaf node and keep track of it.'''
- leaf = btree.LeafNode(self.new_id(), pairs)
+ leaf = btree.LeafNode(self.new_id(), [k for k, v in pairs],
+ [v for k, v in pairs]) # FIXME
self.node_store.put_node(leaf)
return leaf
def new_index(self, pairs):
'''Create a new index node and keep track of it.'''
- index = btree.IndexNode(self.new_id(), pairs)
+ index = btree.IndexNode(self.new_id(), [k for k, v in pairs],
+ [v for k, v in pairs]) # FIXME
self.node_store.put_node(index)
for key, child_id in pairs:
self.increment(child_id)
@@ -203,7 +205,7 @@ class BTree(object):
# Is the tree empty? This needs special casing to keep
# _insert_into_index simpler.
if self.root is None or len(self.root) == 0:
- leaf = btree.LeafNode(self.new_id(), [(key, value)])
+ leaf = btree.LeafNode(self.new_id(), [key], [value])
self.put_node(leaf)
if self.root is None:
self.new_root([(key, leaf.id)])
@@ -254,7 +256,8 @@ class BTree(object):
if len(new_index) > self.max_index_length:
n = len(new_index) / 2
pairs = new_index.pairs()[n:]
- new = btree.IndexNode(self.new_id(), pairs)
+ new = btree.IndexNode(self.new_id(), [k for k,v in pairs],
+ [v for k,v in pairs]) # FIXME
for k, v in pairs:
new_index.remove(k)
self.put_node(new_index)
diff --git a/btree/uploadqueue_tests.py b/btree/uploadqueue_tests.py
index 4a6ac8e..3ed3c57 100644
--- a/btree/uploadqueue_tests.py
+++ b/btree/uploadqueue_tests.py
@@ -30,7 +30,7 @@ class UploadQueueTests(unittest.TestCase):
self.max_queue = 2
self.nodes = []
self.uq = btree.UploadQueue(self.really_put, self.max_queue)
- self.node = btree.LeafNode(1, [])
+ self.node = btree.LeafNode(1, [], [])
def really_put(self, node):
self.nodes.append(node)
@@ -47,7 +47,7 @@ class UploadQueueTests(unittest.TestCase):
self.assertEqual(self.uq.get(self.node.id), self.node)
def test_put_replaces_existing_node(self):
- node2 = btree.LeafNode(1, [('foo', 'bar')])
+ node2 = btree.LeafNode(1, ['foo'], ['bar'])
self.uq.put(self.node)
self.uq.put(node2)
self.assertEqual(self.uq.get(self.node.id), node2)
@@ -67,20 +67,20 @@ class UploadQueueTests(unittest.TestCase):
def test_does_not_push_second_node(self):
self.uq.put(self.node)
- self.uq.put(btree.LeafNode(2, []))
+ self.uq.put(btree.LeafNode(2, [], []))
self.assertEqual(self.nodes, [])
def test_pushes_first_node_after_third_is_pushed(self):
self.uq.put(self.node)
- self.uq.put(btree.LeafNode(2, []))
- self.uq.put(btree.LeafNode(3, []))
+ self.uq.put(btree.LeafNode(2, [], []))
+ self.uq.put(btree.LeafNode(3, [], []))
self.assertEqual(self.nodes, [self.node])
def test_pushes_oldest_even_if_recently_used(self):
self.uq.put(self.node)
- self.uq.put(btree.LeafNode(2, []))
+ self.uq.put(btree.LeafNode(2, [], []))
self.uq.get(self.node.id)
- self.uq.put(btree.LeafNode(3, []))
+ self.uq.put(btree.LeafNode(3, [], []))
self.assertEqual(self.nodes, [self.node])
def test_pushes_out_only_node_when_requested(self):
diff --git a/codec-speed b/codec-speed
index fa3ccf9..fd9ee74 100755
--- a/codec-speed
+++ b/codec-speed
@@ -19,6 +19,8 @@ echo -n "leaf_size "
python -m timeit \
-s 'import btree' \
-s 'pairs = [("%019d" % i, "%032d" % i) for i in range(1000)]' \
+ -s 'keys = [k for k, v in pairs]' \
+ -s 'values = [v for k, v in pairs]' \
-s 'codec = btree.NodeCodec(19)' \
'codec.leaf_size(pairs)'
@@ -26,7 +28,9 @@ echo -n "encode_leaf "
python -m timeit \
-s 'import btree' \
-s 'pairs = [("%019d" % i, "%032d" % i) for i in range(1000)]' \
- -s 'node = btree.LeafNode(42, pairs)' \
+ -s 'keys = [k for k, v in pairs]' \
+ -s 'values = [v for k, v in pairs]' \
+ -s 'node = btree.LeafNode(42, keys, values)' \
-s 'codec = btree.NodeCodec(19)' \
'codec.encode_leaf(node)'
@@ -34,7 +38,9 @@ echo -n "decode leaf "
python -m timeit \
-s 'import btree' \
-s 'pairs = [("%019d" % i, "%032d" % i) for i in range(1000)]' \
- -s 'node = btree.LeafNode(42, pairs)' \
+ -s 'keys = [k for k, v in pairs]' \
+ -s 'values = [v for k, v in pairs]' \
+ -s 'node = btree.LeafNode(42, keys, values)' \
-s 'codec = btree.NodeCodec(19)' \
-s 'encoded = codec.encode_leaf(node)' \
'codec.decode(encoded)'
@@ -43,7 +49,9 @@ echo -n "encode_index "
python -m timeit \
-s 'import btree' \
-s 'pairs = [("%019d" % i, i) for i in range(1000)]' \
- -s 'node = btree.IndexNode(42, pairs)' \
+ -s 'keys = [k for k, v in pairs]' \
+ -s 'values = [v for k, v in pairs]' \
+ -s 'node = btree.IndexNode(42, keys, values)' \
-s 'codec = btree.NodeCodec(19)' \
'codec.encode_index(node)'
@@ -51,7 +59,9 @@ echo -n "decode index "
python -m timeit \
-s 'import btree' \
-s 'pairs = [("%019d" % i, i) for i in range(1000)]' \
- -s 'node = btree.IndexNode(42, pairs)' \
+ -s 'keys = [k for k, v in pairs]' \
+ -s 'values = [v for k, v in pairs]' \
+ -s 'node = btree.IndexNode(42, keys, values)' \
-s 'codec = btree.NodeCodec(19)' \
-s 'encoded = codec.encode_index(node)' \
'codec.decode(encoded)'