diff options
author | Lars Wirzenius <liw@liw.fi> | 2010-12-30 20:18:03 +0000 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2010-12-30 20:18:03 +0000 |
commit | 8e59339a2a7f8d697360dcc662187bd178c3babc (patch) | |
tree | 6b4b6a71352789e4da403c1cd7c7c4762728740f | |
parent | 26d30b8c228e005dcec03453b1592a5e93c625f0 (diff) | |
download | larch-8e59339a2a7f8d697360dcc662187bd178c3babc.tar.gz |
Change Nodes to get keys and values as separate lists.
-rw-r--r-- | btree/codec.py | 12 | ||||
-rw-r--r-- | btree/codec_tests.py | 18 | ||||
-rw-r--r-- | btree/nodes.py | 11 | ||||
-rw-r--r-- | btree/nodes_tests.py | 67 | ||||
-rw-r--r-- | btree/nodestore.py | 16 | ||||
-rw-r--r-- | btree/nodestore_disk_tests.py | 8 | ||||
-rw-r--r-- | btree/tree.py | 11 | ||||
-rw-r--r-- | btree/uploadqueue_tests.py | 14 | ||||
-rwxr-xr-x | codec-speed | 18 |
9 files changed, 97 insertions, 78 deletions
diff --git a/btree/codec.py b/btree/codec.py index 668b9b2..92cc9c3 100644 --- a/btree/codec.py +++ b/btree/codec.py @@ -57,7 +57,8 @@ class NodeCodec(object): def encode_leaf(self, node): '''Encode a leaf node as a byte string.''' - keys, values = zip(*node.pairs()) or [(), ()] + keys = node.keys() + values = node.values() return (self.leaf_header.pack('ORBL', node.id, len(keys)) + ''.join(keys) + struct.pack('!%dI' % len(values), *map(len, values)) + @@ -79,10 +80,9 @@ class NodeCodec(object): offset = self.leaf_header.size + self.leaf_pair_fixed_size * num_pairs append = values.append for length in lengths: - append(encoded[offset:offset + length]) - offset += length - pairs = zip(keys, values) - return btree.LeafNode(node_id, pairs) + append(encoded[offset:offset + length]) + offset += length + return btree.LeafNode(node_id, keys, values) def max_index_pairs(self, node_size): '''Return number of index pairs that fit in a node of a given size.''' @@ -115,7 +115,7 @@ class NodeCodec(object): assert len(keys) == len(child_ids) for x in child_ids: assert type(x) == int - return btree.IndexNode(node_id, zip(keys, child_ids)) + return btree.IndexNode(node_id, keys, child_ids) def encode(self, node): if isinstance(node, btree.LeafNode): diff --git a/btree/codec_tests.py b/btree/codec_tests.py index 4855f7e..4d8fdfa 100644 --- a/btree/codec_tests.py +++ b/btree/codec_tests.py @@ -22,10 +22,8 @@ import btree class NodeCodecTests(unittest.TestCase): def setUp(self): - self.leaf = btree.LeafNode(1234, [('foo', 'bar'), ('yoo', 'yoyo')]) - self.index = btree.IndexNode(5678, - [('bar', 1234), - ('foo', 7890)]) + self.leaf = btree.LeafNode(1234, ['foo', 'yoo'], ['bar', 'yoyo']) + self.index = btree.IndexNode(5678, ['bar', 'foo'], [1234, 7890]) self.codec = btree.NodeCodec(3) def test_returns_reasonable_size_for_empty_leaf(self): @@ -35,20 +33,24 @@ class NodeCodecTests(unittest.TestCase): self.assert_(self.codec.index_size([]) > 10) def test_returns_reasonable_size_for_empty_leaf_generic(self): - leaf = btree.LeafNode(0, []) + leaf = btree.LeafNode(0, [], []) self.assert_(self.codec.size(leaf) > 10) def test_returns_reasonable_size_for_empty_index_generic(self): - index = btree.IndexNode(0, []) + index = btree.IndexNode(0, [], []) self.assert_(self.codec.size(index) > 10) def test_leaf_round_trip_ok(self): encoded = self.codec.encode_leaf(self.leaf) - self.assertEqual(self.codec.decode_leaf(encoded), self.leaf) + decoded = self.codec.decode_leaf(encoded) + self.assertEqual(decoded, self.leaf) def test_index_round_trip_ok(self): encoded = self.codec.encode_index(self.index) - self.assertEqual(self.codec.decode_index(encoded), self.index) + decoded = self.codec.decode_index(encoded) + self.assertEqual(decoded.keys(), self.index.keys()) + self.assertEqual(decoded.values(), self.index.values()) + self.assertEqual(decoded, self.index) def test_generic_round_trip_ok_for_leaf(self): encoded = self.codec.encode(self.leaf) diff --git a/btree/nodes.py b/btree/nodes.py index d263c75..85b10b8 100644 --- a/btree/nodes.py +++ b/btree/nodes.py @@ -29,11 +29,12 @@ class Node(object): ''' - def __init__(self, node_id, pairs=None): - pairs = pairs or [] - self._keys = [k for k, v in pairs] - self._values = [v for k, v in pairs] - self._dict = dict(pairs) + def __init__(self, node_id, keys, values): + self._keys = list(keys) + self._values = list(values) + self._dict = dict() + for i in range(len(keys)): + self._dict[keys[i]] = values[i] self.id = node_id self.size = None diff --git a/btree/nodes_tests.py b/btree/nodes_tests.py index 3f92eef..fcfdb83 100644 --- a/btree/nodes_tests.py +++ b/btree/nodes_tests.py @@ -25,7 +25,9 @@ class NodeTests(unittest.TestCase): self.node_id = 12765 self.pairs = [('key2', 'value2'), ('key1', 'value1')] self.pairs.sort() - self.node = btree.nodes.Node(self.node_id, self.pairs) + self.keys = [k for k, v in self.pairs] + self.values = [v for k, v in self.pairs] + self.node = btree.nodes.Node(self.node_id, self.keys, self.values) def test_has_id(self): self.assertEqual(self.node.id, self.node_id) @@ -78,77 +80,79 @@ class NodeTests(unittest.TestCase): [('key2', 'value2')]) def test_adds_key_value_pair_to_empty_node(self): - node = btree.nodes.Node(0, []) + node = btree.nodes.Node(0, [], []) node.add('foo', 'bar') self.assertEqual(node.pairs(), [('foo', 'bar')]) self.assertEqual(node['foo'], 'bar') def test_adds_key_value_pair_to_end_of_node_of_one_element(self): - node = btree.nodes.Node(0, [('foo', 'bar')]) - node.add('foo2', 'bar') - self.assertEqual(node.pairs(), [('foo', 'bar'), ('foo2', 'bar')]) - self.assertEqual(node['foo2'], 'bar') + node = btree.nodes.Node(0, ['foo'], ['bar']) + node.add('foo2', 'bar2') + self.assertEqual(node.keys(), ['foo', 'foo2']) + self.assertEqual(node.values(), ['bar', 'bar2']) + self.assertEqual(node['foo2'], 'bar2') def test_adds_key_value_pair_to_beginning_of_node_of_one_element(self): - node = btree.nodes.Node(0, [('foo', 'bar')]) + node = btree.nodes.Node(0, ['foo'], ['bar']) node.add('bar', 'bar') - self.assertEqual(node.pairs(), [('bar', 'bar'), ('foo', 'bar')]) + self.assertEqual(node.keys(), ['bar', 'foo']) + self.assertEqual(node.values(), ['bar', 'bar']) self.assertEqual(node['bar'], 'bar') def test_adds_key_value_pair_to_middle_of_node_of_two_elements(self): - node = btree.nodes.Node(0, [('bar', 'bar'), ('foo', 'bar')]) + node = btree.nodes.Node(0, ['bar', 'foo'], ['bar', 'bar']) node.add('duh', 'bar') self.assertEqual(node.pairs(), [('bar', 'bar'), ('duh', 'bar'), ('foo', 'bar')]) self.assertEqual(node['duh'], 'bar') def test_add_replaces_value_for_existing_key(self): - node = btree.nodes.Node(0, [('bar', 'bar'), ('foo', 'bar')]) + node = btree.nodes.Node(0, ['bar', 'foo'], ['bar', 'bar']) node.add('bar', 'xxx') self.assertEqual(node.pairs(), [('bar', 'xxx'), ('foo', 'bar')]) self.assertEqual(node['bar'], 'xxx') def test_add_resets_cached_size(self): - node = btree.nodes.Node(0, []) + node = btree.nodes.Node(0, [], []) node.size = 1234 node.add('foo', 'bar') self.assertEqual(node.size, None) def test_removes_first_key(self): - node = btree.nodes.Node(0, [('bar', 'bar'), ('duh', 'bar'), - ('foo', 'bar')]) + node = btree.nodes.Node(0, ['bar', 'duh', 'foo'], + ['bar', 'bar', 'bar']) node.remove('bar') self.assertEqual(node.pairs(), [('duh', 'bar'), ('foo', 'bar')]) self.assertRaises(KeyError, node.__getitem__, 'bar') def test_removes_last_key(self): - node = btree.nodes.Node(0, [('bar', 'bar'), ('duh', 'bar'), - ('foo', 'bar')]) + node = btree.nodes.Node(0, ['bar', 'duh', 'foo'], + ['bar', 'bar', 'bar']) node.remove('foo') self.assertEqual(node.pairs(), [('bar', 'bar'), ('duh', 'bar')]) self.assertRaises(KeyError, node.__getitem__, 'foo') def test_removes_middle_key(self): - node = btree.nodes.Node(0, [('bar', 'bar'), ('duh', 'bar'), - ('foo', 'bar')]) + node = btree.nodes.Node(0, ['bar', 'duh', 'foo'], + ['bar', 'bar', 'bar']) node.remove('duh') self.assertEqual(node.pairs(), [('bar', 'bar'), ('foo', 'bar')]) self.assertRaises(KeyError, node.__getitem__, 'duh') def test_raises_exception_when_removing_unknown_key(self): - node = btree.nodes.Node(0, [('bar', 'bar'), ('duh', 'bar'), - ('foo', 'bar')]) + node = btree.nodes.Node(0, ['bar', 'duh', 'foo'], + ['bar', 'bar', 'bar']) self.assertRaises(KeyError, node.remove, 'yo') def test_remove_resets_cached_size(self): - node = btree.nodes.Node(0, [('foo', 'bar')]) + node = btree.nodes.Node(0, ['foo'], ['bar']) node.size = 1234 node.remove('foo') self.assertEqual(node.size, None) def test_removes_index_range(self): - node = btree.nodes.Node(0, [('bar', 'bar'), ('duh', 'bar'), - ('foo', 'bar')]) + node = btree.nodes.Node(0, ['bar', 'duh', 'foo'], + ['bar', 'bar', 'bar']) node.size = 12375654 node.remove_index_range(1, 5) self.assertEqual(node.pairs(), [('bar', 'bar')]) @@ -161,7 +165,7 @@ class NodeTests(unittest.TestCase): bar = ('bar', 'bar') foo = ('foo', 'foo') - node = btree.LeafNode(0, [('bar', 'bar'), ('foo', 'foo')]) + node = btree.LeafNode(0, ['bar', 'foo'], ['bar', 'foo']) find = node.find_pairs self.assertEqual(find('aaa', 'aaa'), []) @@ -185,7 +189,7 @@ class NodeTests(unittest.TestCase): self.assertEqual(find('ggg', 'ggg'), []) def test_finds_no_potential_range_in_empty_node(self): - node = btree.LeafNode(0, []) + node = btree.LeafNode(0, [], []) self.assertEqual(node.find_potential_range('aaa', 'bbb'), (None, None)) def test_finds_potential_ranges(self): @@ -193,7 +197,7 @@ class NodeTests(unittest.TestCase): # every combination of minkey and maxkey being less than, equal, # or greater than either child key (as long as minkey <= maxkey). - node = btree.LeafNode(0, [('bar', 'bar'), ('foo', 'foo')]) + node = btree.LeafNode(0, ['bar', 'foo'], ['bar', 'foo']) find = node.find_potential_range self.assertEqual(find('aaa', 'aaa'), (None, None)) @@ -223,12 +227,11 @@ class NodeTests(unittest.TestCase): class IndexNodeTests(unittest.TestCase): def setUp(self): - self.leaf1 = btree.LeafNode(0, [('bar', 'bar')]) - self.leaf2 = btree.LeafNode(1, [('foo', 'foo')]) + self.leaf1 = btree.LeafNode(0, ['bar'], ['bar']) + self.leaf2 = btree.LeafNode(1, ['foo'], ['foo']) self.index_id = 1234 - self.index = btree.IndexNode(self.index_id, - [('bar', self.leaf1.id), - ('foo', self.leaf2.id)]) + self.index = btree.IndexNode(self.index_id, ['bar', 'foo'], + [self.leaf1.id, self.leaf2.id]) def test_find_key_for_child_containing(self): @@ -244,11 +247,11 @@ class IndexNodeTests(unittest.TestCase): self.assertEqual(self.index.find_key_for_child_containing('a'), None) def test_finds_no_key_when_node_is_empty(self): - empty = btree.IndexNode(0, []) + empty = btree.IndexNode(0, [], []) self.assertEqual(empty.find_key_for_child_containing('f00'), None) def test_finds_no_children_in_range_when_empty(self): - empty = btree.IndexNode(0, []) + empty = btree.IndexNode(0, [], []) self.assertEqual(empty.find_children_in_range('bar', 'foo'), []) def test_finds_children_in_ranges(self): diff --git a/btree/nodestore.py b/btree/nodestore.py index 34dd5e9..317a936 100644 --- a/btree/nodestore.py +++ b/btree/nodestore.py @@ -231,13 +231,13 @@ class NodeStoreTests(object): # pragma: no cover self.assertEqual(self.ns.list_nodes(), []) def test_puts_and_gets_same(self): - node = btree.LeafNode(0, []) + node = btree.LeafNode(0, [], []) self.ns.put_node(node) self.ns.push_upload_queue() self.assertEqualNodes(self.ns.get_node(0), node) def test_removes_node(self): - node = btree.LeafNode(0, []) + node = btree.LeafNode(0, [], []) self.ns.put_node(node) self.ns.push_upload_queue() self.ns.remove_node(0) @@ -245,31 +245,31 @@ class NodeStoreTests(object): # pragma: no cover self.assertEqual(self.ns.list_nodes(), []) def test_removes_node_from_upload_queue_if_one_exists(self): - node = btree.LeafNode(0, []) + node = btree.LeafNode(0, [], []) self.ns.put_node(node) self.ns.remove_node(0) self.assertRaises(NodeMissing, self.ns.get_node, 0) self.assertEqual(self.ns.list_nodes(), []) def test_lists_node_zero(self): - node = btree.LeafNode(0, []) + node = btree.LeafNode(0, [], []) self.ns.put_node(node) self.ns.push_upload_queue() node_ids = self.ns.list_nodes() self.assertEqual(node_ids, [node.id]) def test_put_allows_to_overwrite_a_node(self): - node = btree.LeafNode(0, []) + node = btree.LeafNode(0, [], []) self.ns.put_node(node) - node = btree.LeafNode(0, [('foo', 'bar')]) + node = btree.LeafNode(0, ['foo'], ['bar']) self.ns.put_node(node) self.assertEqual(self.ns.get_node(0).pairs(), [('foo', 'bar')]) def test_put_allows_to_overwrite_a_node_after_upload_queue_push(self): - node = btree.LeafNode(0, []) + node = btree.LeafNode(0, [], []) self.ns.put_node(node) self.ns.push_upload_queue() - node = btree.LeafNode(0, [('foo', 'bar')]) + node = btree.LeafNode(0, ['foo'], ['bar']) self.ns.put_node(node) self.ns.push_upload_queue() self.assertEqual(self.ns.get_node(0).pairs(), [('foo', 'bar')]) diff --git a/btree/nodestore_disk_tests.py b/btree/nodestore_disk_tests.py index 73da192..f3a3f85 100644 --- a/btree/nodestore_disk_tests.py +++ b/btree/nodestore_disk_tests.py @@ -84,14 +84,14 @@ class NodeStoreDiskTests(unittest.TestCase, btree.NodeStoreTests): self.assertEqual(ns2.get_refcount(0), 1234) def test_put_refuses_too_large_a_node(self): - node = btree.LeafNode(0, [('000', 'x' * (self.node_size + 1))]) + node = btree.LeafNode(0, ['000'], ['x' * (self.node_size + 1)]) def helper(node): self.ns.put_node(node) self.ns.push_upload_queue() self.assertRaises(btree.NodeTooBig, helper, node) def test_puts_and_gets_same_with_cache_emptied(self): - node = btree.LeafNode(0, []) + node = btree.LeafNode(0, [], []) self.ns.put_node(node) self.ns.cache = lru.LRUCache(100) self.assertEqualNodes(self.ns.get_node(0), node) @@ -101,7 +101,7 @@ class NodeStoreDiskTests(unittest.TestCase, btree.NodeStoreTests): self.ns.upload_queue.max = self.ns.upload_max ids = range(self.ns.upload_max + 1) for i in ids: - node = btree.LeafNode(i, []) + node = btree.LeafNode(i, [], []) self.ns.put_node(node) self.assertEqual(sorted(self.ns.list_nodes()), ids) for node_id in ids: @@ -109,7 +109,7 @@ class NodeStoreDiskTests(unittest.TestCase, btree.NodeStoreTests): self.assertEqual(self.ns.get_node(node_id).id, node_id) def test_gets_node_from_disk(self): - node = btree.LeafNode(0, []) + node = btree.LeafNode(0, [], []) self.ns.put_node(node) self.ns.push_upload_queue() ns2 = self.new_ns() diff --git a/btree/tree.py b/btree/tree.py index cb4692c..1386390 100644 --- a/btree/tree.py +++ b/btree/tree.py @@ -74,13 +74,15 @@ class BTree(object): def new_leaf(self, pairs): '''Create a new leaf node and keep track of it.''' - leaf = btree.LeafNode(self.new_id(), pairs) + leaf = btree.LeafNode(self.new_id(), [k for k, v in pairs], + [v for k, v in pairs]) # FIXME self.node_store.put_node(leaf) return leaf def new_index(self, pairs): '''Create a new index node and keep track of it.''' - index = btree.IndexNode(self.new_id(), pairs) + index = btree.IndexNode(self.new_id(), [k for k, v in pairs], + [v for k, v in pairs]) # FIXME self.node_store.put_node(index) for key, child_id in pairs: self.increment(child_id) @@ -203,7 +205,7 @@ class BTree(object): # Is the tree empty? This needs special casing to keep # _insert_into_index simpler. if self.root is None or len(self.root) == 0: - leaf = btree.LeafNode(self.new_id(), [(key, value)]) + leaf = btree.LeafNode(self.new_id(), [key], [value]) self.put_node(leaf) if self.root is None: self.new_root([(key, leaf.id)]) @@ -254,7 +256,8 @@ class BTree(object): if len(new_index) > self.max_index_length: n = len(new_index) / 2 pairs = new_index.pairs()[n:] - new = btree.IndexNode(self.new_id(), pairs) + new = btree.IndexNode(self.new_id(), [k for k,v in pairs], + [v for k,v in pairs]) # FIXME for k, v in pairs: new_index.remove(k) self.put_node(new_index) diff --git a/btree/uploadqueue_tests.py b/btree/uploadqueue_tests.py index 4a6ac8e..3ed3c57 100644 --- a/btree/uploadqueue_tests.py +++ b/btree/uploadqueue_tests.py @@ -30,7 +30,7 @@ class UploadQueueTests(unittest.TestCase): self.max_queue = 2 self.nodes = [] self.uq = btree.UploadQueue(self.really_put, self.max_queue) - self.node = btree.LeafNode(1, []) + self.node = btree.LeafNode(1, [], []) def really_put(self, node): self.nodes.append(node) @@ -47,7 +47,7 @@ class UploadQueueTests(unittest.TestCase): self.assertEqual(self.uq.get(self.node.id), self.node) def test_put_replaces_existing_node(self): - node2 = btree.LeafNode(1, [('foo', 'bar')]) + node2 = btree.LeafNode(1, ['foo'], ['bar']) self.uq.put(self.node) self.uq.put(node2) self.assertEqual(self.uq.get(self.node.id), node2) @@ -67,20 +67,20 @@ class UploadQueueTests(unittest.TestCase): def test_does_not_push_second_node(self): self.uq.put(self.node) - self.uq.put(btree.LeafNode(2, [])) + self.uq.put(btree.LeafNode(2, [], [])) self.assertEqual(self.nodes, []) def test_pushes_first_node_after_third_is_pushed(self): self.uq.put(self.node) - self.uq.put(btree.LeafNode(2, [])) - self.uq.put(btree.LeafNode(3, [])) + self.uq.put(btree.LeafNode(2, [], [])) + self.uq.put(btree.LeafNode(3, [], [])) self.assertEqual(self.nodes, [self.node]) def test_pushes_oldest_even_if_recently_used(self): self.uq.put(self.node) - self.uq.put(btree.LeafNode(2, [])) + self.uq.put(btree.LeafNode(2, [], [])) self.uq.get(self.node.id) - self.uq.put(btree.LeafNode(3, [])) + self.uq.put(btree.LeafNode(3, [], [])) self.assertEqual(self.nodes, [self.node]) def test_pushes_out_only_node_when_requested(self): diff --git a/codec-speed b/codec-speed index fa3ccf9..fd9ee74 100755 --- a/codec-speed +++ b/codec-speed @@ -19,6 +19,8 @@ echo -n "leaf_size " python -m timeit \ -s 'import btree' \ -s 'pairs = [("%019d" % i, "%032d" % i) for i in range(1000)]' \ + -s 'keys = [k for k, v in pairs]' \ + -s 'values = [v for k, v in pairs]' \ -s 'codec = btree.NodeCodec(19)' \ 'codec.leaf_size(pairs)' @@ -26,7 +28,9 @@ echo -n "encode_leaf " python -m timeit \ -s 'import btree' \ -s 'pairs = [("%019d" % i, "%032d" % i) for i in range(1000)]' \ - -s 'node = btree.LeafNode(42, pairs)' \ + -s 'keys = [k for k, v in pairs]' \ + -s 'values = [v for k, v in pairs]' \ + -s 'node = btree.LeafNode(42, keys, values)' \ -s 'codec = btree.NodeCodec(19)' \ 'codec.encode_leaf(node)' @@ -34,7 +38,9 @@ echo -n "decode leaf " python -m timeit \ -s 'import btree' \ -s 'pairs = [("%019d" % i, "%032d" % i) for i in range(1000)]' \ - -s 'node = btree.LeafNode(42, pairs)' \ + -s 'keys = [k for k, v in pairs]' \ + -s 'values = [v for k, v in pairs]' \ + -s 'node = btree.LeafNode(42, keys, values)' \ -s 'codec = btree.NodeCodec(19)' \ -s 'encoded = codec.encode_leaf(node)' \ 'codec.decode(encoded)' @@ -43,7 +49,9 @@ echo -n "encode_index " python -m timeit \ -s 'import btree' \ -s 'pairs = [("%019d" % i, i) for i in range(1000)]' \ - -s 'node = btree.IndexNode(42, pairs)' \ + -s 'keys = [k for k, v in pairs]' \ + -s 'values = [v for k, v in pairs]' \ + -s 'node = btree.IndexNode(42, keys, values)' \ -s 'codec = btree.NodeCodec(19)' \ 'codec.encode_index(node)' @@ -51,7 +59,9 @@ echo -n "decode index " python -m timeit \ -s 'import btree' \ -s 'pairs = [("%019d" % i, i) for i in range(1000)]' \ - -s 'node = btree.IndexNode(42, pairs)' \ + -s 'keys = [k for k, v in pairs]' \ + -s 'values = [v for k, v in pairs]' \ + -s 'node = btree.IndexNode(42, keys, values)' \ -s 'codec = btree.NodeCodec(19)' \ -s 'encoded = codec.encode_index(node)' \ 'codec.decode(encoded)' |