From 4bdaecf153ae2b316904f1e5281fa8b873e305a4 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sat, 10 Oct 2015 12:30:50 +0300 Subject: Drop now-obsolete benchmark programs --- obnam-benchmark | 419 ------------------------------------------------ obnam-benchmark-summary | 136 ---------------- obnam-benchmark.conf | 4 - 3 files changed, 559 deletions(-) delete mode 100755 obnam-benchmark delete mode 100755 obnam-benchmark-summary delete mode 100644 obnam-benchmark.conf diff --git a/obnam-benchmark b/obnam-benchmark deleted file mode 100755 index 9a700867..00000000 --- a/obnam-benchmark +++ /dev/null @@ -1,419 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2014-2015 Lars Wirzenius -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - - -import json -import logging -import os -import platform -import re -import shutil -import stat -import tempfile -import time - -import cliapp -import Crypto.Cipher.ARC4 -import larch -import ttystatus - - -class BinaryJunkGenerator(object): - - key = b'obnam-benchmark' - data = b'fake live data' * 1024 - - def __init__(self): - self.cipher = Crypto.Cipher.ARC4.new(self.key) - self.buffer = '' - - def get(self, num_bytes): - n = 0 - result = [] - while n < num_bytes: - if not self.buffer: - self.buffer = self.cipher.encrypt(self.data) - - part = self.buffer[:num_bytes - n] - result.append(part) - n += len(part) - self.buffer = self.buffer[len(part):] - - return ''.join(result) - - -class StepInfo(object): - - def __init__(self, label): - self.label = label - self.info = { - 'step': label, - } - - def add_info(self, key, value): - self.info[key] = value - - def stop_timer(self): - self.end = time.time() - - def __enter__(self): - self.start = time.time() - self.end = None - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - if exc_type is None: - if self.end is None: - self.end = time.time() - self.info['duration'] = self.end - self.start - return False - - -class ObnamBenchmark(object): - - def __init__(self, settings, results_dir, srctree, junk_generator, output): - self.settings = settings - self.results_dir = results_dir - self.srctree = srctree - self.junk_generator = junk_generator - self.output = output - - @classmethod - def add_settings(self, settings): - pass - - @property - def benchmark_name(self): - s = self.__class__.__name__ - if s.endswith('Benchmark'): - s = s[:-len('Benchmark')] - return s - - def result_filename(self, label, suffix): - return os.path.join( - self.results_dir, - '%s-%s%s' % (self.benchmark_name, label, suffix)) - - def run(self): - self.tempdir = tempfile.mkdtemp() - self.live_data = self.create_live_data_dir() - self.repo = self.create_repo() - step_infos = [] - - steps = [ - ('create-live-data', self.create_live_data), - ('initial-backup', self.backup), - ('no-op-backup', self.backup), - ('obnam-verify', self.obnam_verify), - ('obnam-mount', self.obnam_mount), - ('cleanup', - lambda si: - self.cleanup(si) if self.settings['cleanup'] else None), - ] - - for label, method in steps: - self.output.write(' %s\n' % label) - self.output.flush() - with StepInfo(label) as step_info: - method(step_info) - step_infos.append(step_info) - - return { - 'steps': [step_info.info for step_info in step_infos], - } - - def create_live_data_dir(self): - live_data = os.path.join(self.tempdir, 'live-data') - os.mkdir(live_data) - return live_data - - def create_repo(self): - repo = os.path.join(self.tempdir, 'repo') - os.mkdir(repo) - return repo - - def create_live_data(self, step_info): - # Subclasses MUST override this. - raise NotImplementedError() - - def backup(self, step_info): - log_file = self.log_file_name(step_info.label) - self.run_obnam( - ['backup', '-r', self.repo, self.live_data], - step_info.label, log_file) - step_info.stop_timer() - step_info.add_info('repo-size', self.sum_of_file_sizes(self.repo)) - step_info.add_info( - 'live-data-size', self.sum_of_file_sizes(self.live_data)) - - step_info.add_info( - 'downloaded-bytes', self.bytes_downloaded_from_repo(log_file)) - step_info.add_info( - 'uploaded-bytes', self.bytes_uploaded_to_repo(log_file)) - - def log_file_name(self, label): - return self.result_filename(label, '.log') - - def obnam_verify(self, step_info): - log_file = self.log_file_name(step_info.label) - self.run_obnam( - ['verify', '-r', self.repo], - step_info.label, log_file) - - def obnam_mount(self, step_info): - mount = os.path.join(self.tempdir, 'mount') - os.mkdir(mount) - - log_file = self.log_file_name(step_info.label) - self.run_obnam( - ['mount', '-r', self.repo, '--to', mount], - step_info.label, log_file) - - cliapp.runcmd(['tar', '-cf', '/dev/null', mount + '/.']) - - try: - cliapp.runcmd(['fusermount', '-u', mount]) - except cliapp.AppException as e: - sys.stderr.write('ERROR from fusermount: %s\n' % str(e)) - - def cleanup(self, step_info): - shutil.rmtree(self.tempdir) - - def run_obnam(self, args, label, log_file): - base_command = [ - self.settings['obnam-cmd'], - '--no-default-config', - '--checkpoint=10000T', - '--log', log_file, - '--log-level', 'debug', - ] - env = dict(os.environ) - env['OBNAM_PROFILE'] = self.result_filename(label, '.prof') - cliapp.runcmd(base_command + args, env=env, cwd=self.srctree) - - def sum_of_file_sizes(self, root_dir): - total = 0 - for dirname, subdirs, basenames in os.walk(root_dir): - for basename in basenames: - pathname = os.path.join(dirname, basename) - st = os.lstat(pathname) - if stat.S_ISREG(st.st_mode): - total += st.st_size - return total - - def bytes_downloaded_from_repo(self, log_file): - pattern = r'INFO \* total downloaded data.* (?P\d+) bytes' - return self.get_transferred_bytes_from_log_file(log_file, pattern) - - def bytes_uploaded_to_repo(self, log_file): - pattern = r'INFO \* total uploaded data.* (?P\d+) bytes' - return self.get_transferred_bytes_from_log_file(log_file, pattern) - - def get_transferred_bytes_from_log_file(self, log_file, pattern): - data = self.tail(log_file) - m = re.search(pattern, data) - if m: - return int(m.group('bytes')) - return 0 - - def tail(self, filename): - kibibyte = 1024 - tail_length = 10 * kibibyte - with open(filename, 'rb') as f: - f.seek(0, os.SEEK_END) - length = f.tell() - f.seek(max(0, length - tail_length), os.SEEK_SET) - return f.read() - - -class EmptyFilesBenchmark(ObnamBenchmark): - - files_per_dir = 1000 - - @classmethod - def add_settings(self, settings): - settings.integer( - ['empty-files-count'], - 'number of empty files for %s' % self.__class__.__name__, - default=10**6) - - @property - def num_files(self): - return self.settings['empty-files-count'] - - def create_live_data(self, step_info): - step_info.add_info('empty-files-count', self.num_files) - for i in range(self.num_files): - subdir = os.path.join( - self.live_data, 'dir-%d' % (i / self.files_per_dir)) - if (i % self.files_per_dir) == 0: - os.mkdir(subdir) - filename = os.path.join(subdir, 'file-%d' % i) - with open(filename, 'w'): - pass - - -class SingleLargeFileBenchmark(ObnamBenchmark): - - @classmethod - def add_settings(self, settings): - settings.bytesize( - ['single-large-file-size'], - 'size of file to create for %s' % self.__class__.__name__, - default='1TB') - - @property - def file_size(self): - return self.settings['single-large-file-size'] - - def create_live_data(self, step_info): - step_info.add_info('single-large-file-size', self.file_size) - filename = os.path.join(self.live_data, 'file.dat') - with open(filename, 'w') as f: - n = 0 - max_chunk_size = 2**10 - ts = ttystatus.TerminalStatus() - ts['written'] = 0 - ts['total'] = self.file_size - ts.format( - '%ElapsedTime() ' - 'writing live data: %ByteSize(written) of %ByteSize(total) ' - 'at %ByteSpeed(written) ' - '(%PercentDone(written,total))') - while n < self.file_size: - num_bytes = min(max_chunk_size, self.file_size - n) - data = self.junk_generator.get(num_bytes) - f.write(data) - n += len(data) - ts['written'] = n - ts.clear() - ts.finish() - - -class ObnamBenchmarkRunner(cliapp.Application): - - benchmark_classes = [ - EmptyFilesBenchmark, - SingleLargeFileBenchmark, - ] - - def add_settings(self): - self.settings.string( - ['obnam-cmd'], - 'use CMD as the argv[0] to invoke obnam', - metavar='CMD', - default='./obnam') - - self.settings.string( - ['obnam-treeish'], - 'run Obnam from TREEISH in its git repository', - metavar='TREEISH', - default='HEAD') - - self.settings.string( - ['results-dir'], - 'put results in DIR', - metavar='DIR', - default='.') - - self.settings.boolean( - ['cleanup'], - 'clean up after each benchmark?', - default=True) - - for benchmark_class in self.benchmark_classes: - benchmark_class.add_settings(self.settings) - - def process_args(self, args): - results_dir = self.create_results_dir() - self.store_settings_in_results(results_dir) - result_obj = { - 'system-info': self.get_system_info_dict(), - 'versions': self.get_version_info_dict(), - } - - srctree = self.prepare_source_tree() - - junk_generator = BinaryJunkGenerator() - benchmark_infos = {} - for benchmark_class in self.benchmark_classes: - self.output.write('Benchmark %s\n' % benchmark_class.__name__) - self.output.flush() - benchmark = benchmark_class( - self.settings, results_dir, srctree, junk_generator, - self.output) - benchmark_info = benchmark.run() - benchmark_infos[benchmark.benchmark_name] = benchmark_info - result_obj['benchmarks'] = benchmark_infos - - self.save_result_obj(results_dir, result_obj) - - shutil.rmtree(srctree) - - def create_results_dir(self): - results = os.path.abspath(self.settings['results-dir']) - if not os.path.exists(results): - os.mkdir(results) - return results - - def store_settings_in_results(self, results): - cp = self.settings.as_cp() - filename = os.path.join(results, 'obnam-benchmark.conf') - with open(filename, 'w') as f: - cp.write(f) - - def get_system_info_dict(self): - return { - 'hostname': platform.node(), - 'machine': platform.machine(), - 'architecture': platform.architecture(), - 'uname': platform.uname(), - } - - def get_version_info_dict(self): - treeish = self.settings['obnam-treeish'] - describe = cliapp.runcmd(['git', 'describe', treeish]).strip() - return { - 'obnam-treeish': treeish, - 'obnam-version': describe, - 'larch-version': larch.__version__, - } - - def prepare_source_tree(self): - srctree = tempfile.mkdtemp() - self.extract_sources_from_git(srctree) - self.build_obnam(srctree) - return srctree - - def extract_sources_from_git(self, srctree): - cliapp.runcmd( - ['git', 'archive', self.settings['obnam-treeish']], - ['tar', '-C', srctree, '-xf', '-']) - - def build_obnam(self, srctree): - cliapp.runcmd( - ['python', 'setup.py', 'build_ext', '-i'], - cwd=srctree) - - def save_result_obj(self, results_dir, result_obj): - filename = os.path.join(results_dir, 'benchmark.json') - with open(filename, 'w') as f: - json.dump(result_obj, f, indent=4) - - -if __name__ == '__main__': - ObnamBenchmarkRunner().run() diff --git a/obnam-benchmark-summary b/obnam-benchmark-summary deleted file mode 100755 index b3c408c3..00000000 --- a/obnam-benchmark-summary +++ /dev/null @@ -1,136 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2014 Lars Wirzenius -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - - -import json -import os - -import cliapp - - -MiB = 2**20 -GiB = 2**30 - - -class ObnamBenchmarkSummary(cliapp.Application): - - columns = ( - ('version', 'version'), - ('ef-speed', 'EF files/s'), - ('ef-repo-size', 'EF repo (MiB)'), - ('lf-speed', 'LF MiB/s'), - ('lf-repo-size', 'LF repo (MiB)'), - ) - - def process_args(self, args): - summaries = [] - for dirname in args: - summary = self.summarise_directory(dirname) - summaries.append(summary) - self.show_summaries(summaries) - - def summarise_directory(self, dirname): - filename = os.path.join(dirname, 'benchmark.json') - with open(filename) as f: - obj = json.load(f) - - return { - 'version': - self.get_obnam_version(obj), - 'ef-speed': - '%.0f' % self.get_empty_files_speed(obj), - 'ef-files': - self.get_empty_files_count(obj), - 'ef-repo-size': - self.format_size(self.get_empty_files_repo_size(obj), MiB), - 'lf-speed': - self.format_size(self.get_large_file_speed(obj), MiB), - 'lf-size': - self.format_size(self.get_large_file_size(obj), GiB), - 'lf-repo-size': - self.format_size(self.get_large_file_repo_size(obj), MiB), - } - - def get_obnam_version(self, obj): - return obj['versions']['obnam-version'] - - def get_empty_files_speed(self, obj): - count = self.get_empty_files_count(obj) - step = self.find_step(obj, 'EmptyFiles', 'initial-backup') - return count / step['duration'] - - def get_empty_files_count(self, obj): - step = self.find_step(obj, 'EmptyFiles', 'create-live-data') - return step['empty-files-count'] - - def get_empty_files_repo_size(self, obj): - step = self.find_step(obj, 'EmptyFiles', 'initial-backup') - return step['repo-size'] - - def get_large_file_speed(self, obj): - file_size = self.get_large_file_size(obj) - step = self.find_step(obj, 'SingleLargeFile', 'initial-backup') - return file_size / step['duration'] - - def get_large_file_size(self, obj): - step = self.find_step(obj, 'SingleLargeFile', 'create-live-data') - return step['single-large-file-size'] - - def get_large_file_repo_size(self, obj): - step = self.find_step(obj, 'SingleLargeFile', 'initial-backup') - return step['repo-size'] - - def find_step(self, obj, benchmark_name, step_name): - for step in obj['benchmarks'][benchmark_name]['steps']: - if step['step'] == step_name: - return step - raise Exception('step %s not found' % step) - - def format_size(self, size, unit): - return '%.0f' % (size / unit) - - def show_summaries(self, summaries): - lines = [[title for key, title in self.columns]] - - for s in summaries: - line = [str(s[key]) for key, title in self.columns] - lines.append(line) - - widths = self.compute_column_widths(lines) - - titles = lines[0] - results = sorted(lines[1:]) - for line in [titles] + results: - cells = [] - for i, cell in enumerate(line): - cells.append('%*s' % (widths[i], cell)) - self.output.write(' | '.join(cells)) - self.output.write('\n') - - def compute_column_widths(self, lines): - widths = [] - n = len(lines[0]) - for col in range(n): - width = 0 - for line in lines: - width = max(width, len(line[col])) - widths.append(width) - return widths - - -if __name__ == '__main__': - ObnamBenchmarkSummary().run() diff --git a/obnam-benchmark.conf b/obnam-benchmark.conf deleted file mode 100644 index ad36ecc5..00000000 --- a/obnam-benchmark.conf +++ /dev/null @@ -1,4 +0,0 @@ -[config] -empty-files-count = 1000000 -single-large-file-size = 100G -log = t.results/foo.log -- cgit v1.2.1