#!/usr/bin/python # Copyright 2010 Lars Wirzenius # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Excercise my B-tree implementation, for simple benchmarking purposes. # The benchmark gets a location and an operation count as command line # arguments. # # If the location is the empty string, an in-memory node store is used. # Otherwise it must be a non-existent directory name. # # The benchmark will do the given number of insertions into the tree, and # do_it the speed of that. Then it will look up each of those, and do_it # the lookups. import cProfile import logging import os import random import shutil import sys import time import larch def do_it(keys, func, final): start = time.clock() for key in keys: func(key) final() end = time.clock() return end - start def assert_refcounts_are_one(tree): def helper(node_id): refcount = tree.node_store.rs.get_refcount(node_id) node = tree._get_node(node_id) assert refcount == 1, 'type=%s id=%d refcount=%d' % (repr(node), node_id, refcount) if isinstance(node, larch.IndexNode): for child_id in node.values(): helper(child_id) helper(tree.root.id) def do_insert(tree, key, value): logging.debug('do_insert(%s)' % (repr(key))) if tree.root is not None: assert_refcounts_are_one(tree) tree.insert(key, value) assert_refcounts_are_one(tree) def do_remove(tree, key): logging.debug('do_remove(%s)' % (repr(key))) assert_refcounts_are_one(tree) tree.remove(key) assert_refcounts_are_one(tree) def main(): if True: import logging import tracing tracing.trace_add_pattern('tree') logging.basicConfig(filename='larch.log', level=logging.DEBUG) location = sys.argv[1] n = int(sys.argv[2]) key_size = 19 value_size = 128 node_size = 300 codec = larch.NodeCodec(key_size) if location == '': ns = larch.NodeStoreMemory(node_size, codec) else: if os.path.exists(location): raise Exception('%s exists already' % location) os.mkdir(location) ns = larch.NodeStoreDisk(True, node_size, codec, dirname=location) forest = larch.Forest(ns) tree = forest.new_tree() logging.debug('min keys: %d' % tree.min_index_length) logging.debug('max keys: %d' % tree.max_index_length) # Create list of keys. keys = ['%0*d' % (key_size, i) for i in xrange(n)] # Do inserts. value = 'x' * value_size logging.debug('start inserts') do_it(keys, lambda key: do_insert(tree, key, value), lambda: forest.commit()) logging.debug('# nodes: %d' % len(ns.list_nodes())) logging.debug('nodes: %s' % sorted(ns.list_nodes())) print '# nodes after inserts:', len(ns.list_nodes()) # Remove all but one key. logging.debug('start removes') do_it(keys[1:], lambda key: do_remove(tree, key), lambda: forest.commit()) logging.debug('# nodes: %d' % len(ns.list_nodes())) logging.debug('nodes: %s' % sorted(ns.list_nodes())) print '# nodes after removes:', len(ns.list_nodes()) assert len(ns.list_nodes()) == 2 if __name__ == '__main__': main()