summaryrefslogtreecommitdiff
path: root/insert-remove-test
blob: dd6cecf7f4e15af312275c7e916d949330f29bc4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/python
# Copyright 2010  Lars Wirzenius
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

# Excercise my B-tree implementation, for simple benchmarking purposes.
# The benchmark gets a location and an operation count as command line
# arguments.
#
# If the location is the empty string, an in-memory node store is used.
# Otherwise it must be a non-existent directory name.
#
# The benchmark will do the given number of insertions into the tree, and
# do_it the speed of that. Then it will look up each of those, and do_it
# the lookups.


import cProfile
import logging
import os
import random
import shutil
import sys
import time

import larch


def do_it(keys, func, final):
    start = time.clock()
    for key in keys:
        func(key)
    final()
    end = time.clock()
    return end - start


def assert_refcounts_are_one(tree):
    def helper(node_id):
        refcount = tree.node_store.rs.get_refcount(node_id)
        node = tree._get_node(node_id)
        assert refcount == 1, 'type=%s id=%d refcount=%d' % (repr(node), 
                                                             node_id, refcount)
        if isinstance(node, larch.IndexNode):
            for child_id in node.values():
                helper(child_id)
    helper(tree.root.id)


def do_insert(tree, key, value):
    logging.debug('do_insert(%s)' % (repr(key)))
    if tree.root is not None:
        assert_refcounts_are_one(tree)
    tree.insert(key, value)
    assert_refcounts_are_one(tree)


def do_remove(tree, key):
    logging.debug('do_remove(%s)' % (repr(key)))
    assert_refcounts_are_one(tree)
    tree.remove(key)
    assert_refcounts_are_one(tree)


def main():
    if True:
        import logging
        import tracing
        tracing.trace_add_pattern('tree')
        logging.basicConfig(filename='larch.log', level=logging.DEBUG)

    location = sys.argv[1]
    n = int(sys.argv[2])

    key_size = 19
    value_size = 128
    node_size = 300

    codec = larch.NodeCodec(key_size)

    if location == '':
        ns = larch.NodeStoreMemory(node_size, codec)
    else:
        if os.path.exists(location):
            raise Exception('%s exists already' % location)
        os.mkdir(location)
        ns = larch.NodeStoreDisk(True, node_size, codec, dirname=location)

    forest = larch.Forest(ns)
    tree = forest.new_tree()
    logging.debug('min keys: %d' % tree.min_index_length)
    logging.debug('max keys: %d' % tree.max_index_length)
    
    # Create list of keys.
    keys = ['%0*d' % (key_size, i) for i in xrange(n)]
    
    # Do inserts.
    value = 'x' * value_size
    logging.debug('start inserts')
    do_it(keys, lambda key: do_insert(tree, key, value), 
          lambda: forest.commit())
    logging.debug('# nodes: %d' % len(ns.list_nodes()))
    logging.debug('nodes: %s' % sorted(ns.list_nodes()))
    print '# nodes after inserts:', len(ns.list_nodes())
        
    # Remove all but one key.
    logging.debug('start removes')
    do_it(keys[1:], lambda key: do_remove(tree, key), lambda: forest.commit())
    logging.debug('# nodes: %d' % len(ns.list_nodes()))
    logging.debug('nodes: %s' % sorted(ns.list_nodes()))
    print '# nodes after removes:', len(ns.list_nodes())
    assert len(ns.list_nodes()) == 2


if __name__ == '__main__':
    main()