summaryrefslogtreecommitdiff
path: root/obnam-benchmark
diff options
context:
space:
mode:
Diffstat (limited to 'obnam-benchmark')
-rwxr-xr-xobnam-benchmark419
1 files changed, 0 insertions, 419 deletions
diff --git a/obnam-benchmark b/obnam-benchmark
deleted file mode 100755
index 9a700867..00000000
--- a/obnam-benchmark
+++ /dev/null
@@ -1,419 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2014-2015 Lars Wirzenius
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-import json
-import logging
-import os
-import platform
-import re
-import shutil
-import stat
-import tempfile
-import time
-
-import cliapp
-import Crypto.Cipher.ARC4
-import larch
-import ttystatus
-
-
-class BinaryJunkGenerator(object):
-
- key = b'obnam-benchmark'
- data = b'fake live data' * 1024
-
- def __init__(self):
- self.cipher = Crypto.Cipher.ARC4.new(self.key)
- self.buffer = ''
-
- def get(self, num_bytes):
- n = 0
- result = []
- while n < num_bytes:
- if not self.buffer:
- self.buffer = self.cipher.encrypt(self.data)
-
- part = self.buffer[:num_bytes - n]
- result.append(part)
- n += len(part)
- self.buffer = self.buffer[len(part):]
-
- return ''.join(result)
-
-
-class StepInfo(object):
-
- def __init__(self, label):
- self.label = label
- self.info = {
- 'step': label,
- }
-
- def add_info(self, key, value):
- self.info[key] = value
-
- def stop_timer(self):
- self.end = time.time()
-
- def __enter__(self):
- self.start = time.time()
- self.end = None
- return self
-
- def __exit__(self, exc_type, exc_val, exc_tb):
- if exc_type is None:
- if self.end is None:
- self.end = time.time()
- self.info['duration'] = self.end - self.start
- return False
-
-
-class ObnamBenchmark(object):
-
- def __init__(self, settings, results_dir, srctree, junk_generator, output):
- self.settings = settings
- self.results_dir = results_dir
- self.srctree = srctree
- self.junk_generator = junk_generator
- self.output = output
-
- @classmethod
- def add_settings(self, settings):
- pass
-
- @property
- def benchmark_name(self):
- s = self.__class__.__name__
- if s.endswith('Benchmark'):
- s = s[:-len('Benchmark')]
- return s
-
- def result_filename(self, label, suffix):
- return os.path.join(
- self.results_dir,
- '%s-%s%s' % (self.benchmark_name, label, suffix))
-
- def run(self):
- self.tempdir = tempfile.mkdtemp()
- self.live_data = self.create_live_data_dir()
- self.repo = self.create_repo()
- step_infos = []
-
- steps = [
- ('create-live-data', self.create_live_data),
- ('initial-backup', self.backup),
- ('no-op-backup', self.backup),
- ('obnam-verify', self.obnam_verify),
- ('obnam-mount', self.obnam_mount),
- ('cleanup',
- lambda si:
- self.cleanup(si) if self.settings['cleanup'] else None),
- ]
-
- for label, method in steps:
- self.output.write(' %s\n' % label)
- self.output.flush()
- with StepInfo(label) as step_info:
- method(step_info)
- step_infos.append(step_info)
-
- return {
- 'steps': [step_info.info for step_info in step_infos],
- }
-
- def create_live_data_dir(self):
- live_data = os.path.join(self.tempdir, 'live-data')
- os.mkdir(live_data)
- return live_data
-
- def create_repo(self):
- repo = os.path.join(self.tempdir, 'repo')
- os.mkdir(repo)
- return repo
-
- def create_live_data(self, step_info):
- # Subclasses MUST override this.
- raise NotImplementedError()
-
- def backup(self, step_info):
- log_file = self.log_file_name(step_info.label)
- self.run_obnam(
- ['backup', '-r', self.repo, self.live_data],
- step_info.label, log_file)
- step_info.stop_timer()
- step_info.add_info('repo-size', self.sum_of_file_sizes(self.repo))
- step_info.add_info(
- 'live-data-size', self.sum_of_file_sizes(self.live_data))
-
- step_info.add_info(
- 'downloaded-bytes', self.bytes_downloaded_from_repo(log_file))
- step_info.add_info(
- 'uploaded-bytes', self.bytes_uploaded_to_repo(log_file))
-
- def log_file_name(self, label):
- return self.result_filename(label, '.log')
-
- def obnam_verify(self, step_info):
- log_file = self.log_file_name(step_info.label)
- self.run_obnam(
- ['verify', '-r', self.repo],
- step_info.label, log_file)
-
- def obnam_mount(self, step_info):
- mount = os.path.join(self.tempdir, 'mount')
- os.mkdir(mount)
-
- log_file = self.log_file_name(step_info.label)
- self.run_obnam(
- ['mount', '-r', self.repo, '--to', mount],
- step_info.label, log_file)
-
- cliapp.runcmd(['tar', '-cf', '/dev/null', mount + '/.'])
-
- try:
- cliapp.runcmd(['fusermount', '-u', mount])
- except cliapp.AppException as e:
- sys.stderr.write('ERROR from fusermount: %s\n' % str(e))
-
- def cleanup(self, step_info):
- shutil.rmtree(self.tempdir)
-
- def run_obnam(self, args, label, log_file):
- base_command = [
- self.settings['obnam-cmd'],
- '--no-default-config',
- '--checkpoint=10000T',
- '--log', log_file,
- '--log-level', 'debug',
- ]
- env = dict(os.environ)
- env['OBNAM_PROFILE'] = self.result_filename(label, '.prof')
- cliapp.runcmd(base_command + args, env=env, cwd=self.srctree)
-
- def sum_of_file_sizes(self, root_dir):
- total = 0
- for dirname, subdirs, basenames in os.walk(root_dir):
- for basename in basenames:
- pathname = os.path.join(dirname, basename)
- st = os.lstat(pathname)
- if stat.S_ISREG(st.st_mode):
- total += st.st_size
- return total
-
- def bytes_downloaded_from_repo(self, log_file):
- pattern = r'INFO \* total downloaded data.* (?P<bytes>\d+) bytes'
- return self.get_transferred_bytes_from_log_file(log_file, pattern)
-
- def bytes_uploaded_to_repo(self, log_file):
- pattern = r'INFO \* total uploaded data.* (?P<bytes>\d+) bytes'
- return self.get_transferred_bytes_from_log_file(log_file, pattern)
-
- def get_transferred_bytes_from_log_file(self, log_file, pattern):
- data = self.tail(log_file)
- m = re.search(pattern, data)
- if m:
- return int(m.group('bytes'))
- return 0
-
- def tail(self, filename):
- kibibyte = 1024
- tail_length = 10 * kibibyte
- with open(filename, 'rb') as f:
- f.seek(0, os.SEEK_END)
- length = f.tell()
- f.seek(max(0, length - tail_length), os.SEEK_SET)
- return f.read()
-
-
-class EmptyFilesBenchmark(ObnamBenchmark):
-
- files_per_dir = 1000
-
- @classmethod
- def add_settings(self, settings):
- settings.integer(
- ['empty-files-count'],
- 'number of empty files for %s' % self.__class__.__name__,
- default=10**6)
-
- @property
- def num_files(self):
- return self.settings['empty-files-count']
-
- def create_live_data(self, step_info):
- step_info.add_info('empty-files-count', self.num_files)
- for i in range(self.num_files):
- subdir = os.path.join(
- self.live_data, 'dir-%d' % (i / self.files_per_dir))
- if (i % self.files_per_dir) == 0:
- os.mkdir(subdir)
- filename = os.path.join(subdir, 'file-%d' % i)
- with open(filename, 'w'):
- pass
-
-
-class SingleLargeFileBenchmark(ObnamBenchmark):
-
- @classmethod
- def add_settings(self, settings):
- settings.bytesize(
- ['single-large-file-size'],
- 'size of file to create for %s' % self.__class__.__name__,
- default='1TB')
-
- @property
- def file_size(self):
- return self.settings['single-large-file-size']
-
- def create_live_data(self, step_info):
- step_info.add_info('single-large-file-size', self.file_size)
- filename = os.path.join(self.live_data, 'file.dat')
- with open(filename, 'w') as f:
- n = 0
- max_chunk_size = 2**10
- ts = ttystatus.TerminalStatus()
- ts['written'] = 0
- ts['total'] = self.file_size
- ts.format(
- '%ElapsedTime() '
- 'writing live data: %ByteSize(written) of %ByteSize(total) '
- 'at %ByteSpeed(written) '
- '(%PercentDone(written,total))')
- while n < self.file_size:
- num_bytes = min(max_chunk_size, self.file_size - n)
- data = self.junk_generator.get(num_bytes)
- f.write(data)
- n += len(data)
- ts['written'] = n
- ts.clear()
- ts.finish()
-
-
-class ObnamBenchmarkRunner(cliapp.Application):
-
- benchmark_classes = [
- EmptyFilesBenchmark,
- SingleLargeFileBenchmark,
- ]
-
- def add_settings(self):
- self.settings.string(
- ['obnam-cmd'],
- 'use CMD as the argv[0] to invoke obnam',
- metavar='CMD',
- default='./obnam')
-
- self.settings.string(
- ['obnam-treeish'],
- 'run Obnam from TREEISH in its git repository',
- metavar='TREEISH',
- default='HEAD')
-
- self.settings.string(
- ['results-dir'],
- 'put results in DIR',
- metavar='DIR',
- default='.')
-
- self.settings.boolean(
- ['cleanup'],
- 'clean up after each benchmark?',
- default=True)
-
- for benchmark_class in self.benchmark_classes:
- benchmark_class.add_settings(self.settings)
-
- def process_args(self, args):
- results_dir = self.create_results_dir()
- self.store_settings_in_results(results_dir)
- result_obj = {
- 'system-info': self.get_system_info_dict(),
- 'versions': self.get_version_info_dict(),
- }
-
- srctree = self.prepare_source_tree()
-
- junk_generator = BinaryJunkGenerator()
- benchmark_infos = {}
- for benchmark_class in self.benchmark_classes:
- self.output.write('Benchmark %s\n' % benchmark_class.__name__)
- self.output.flush()
- benchmark = benchmark_class(
- self.settings, results_dir, srctree, junk_generator,
- self.output)
- benchmark_info = benchmark.run()
- benchmark_infos[benchmark.benchmark_name] = benchmark_info
- result_obj['benchmarks'] = benchmark_infos
-
- self.save_result_obj(results_dir, result_obj)
-
- shutil.rmtree(srctree)
-
- def create_results_dir(self):
- results = os.path.abspath(self.settings['results-dir'])
- if not os.path.exists(results):
- os.mkdir(results)
- return results
-
- def store_settings_in_results(self, results):
- cp = self.settings.as_cp()
- filename = os.path.join(results, 'obnam-benchmark.conf')
- with open(filename, 'w') as f:
- cp.write(f)
-
- def get_system_info_dict(self):
- return {
- 'hostname': platform.node(),
- 'machine': platform.machine(),
- 'architecture': platform.architecture(),
- 'uname': platform.uname(),
- }
-
- def get_version_info_dict(self):
- treeish = self.settings['obnam-treeish']
- describe = cliapp.runcmd(['git', 'describe', treeish]).strip()
- return {
- 'obnam-treeish': treeish,
- 'obnam-version': describe,
- 'larch-version': larch.__version__,
- }
-
- def prepare_source_tree(self):
- srctree = tempfile.mkdtemp()
- self.extract_sources_from_git(srctree)
- self.build_obnam(srctree)
- return srctree
-
- def extract_sources_from_git(self, srctree):
- cliapp.runcmd(
- ['git', 'archive', self.settings['obnam-treeish']],
- ['tar', '-C', srctree, '-xf', '-'])
-
- def build_obnam(self, srctree):
- cliapp.runcmd(
- ['python', 'setup.py', 'build_ext', '-i'],
- cwd=srctree)
-
- def save_result_obj(self, results_dir, result_obj):
- filename = os.path.join(results_dir, 'benchmark.json')
- with open(filename, 'w') as f:
- json.dump(result_obj, f, indent=4)
-
-
-if __name__ == '__main__':
- ObnamBenchmarkRunner().run()