#!/usr/bin/env python # # Copyright 2014 Lars Wirzenius # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import json import os import platform import shutil import stat import sys import tempfile import time import cliapp import Crypto.Cipher.ARC4 import larch import ttystatus class BinaryJunkGenerator(object): key = b'obnam-benchmark' data = b'fake live data' * 1024 def __init__(self): self.cipher = Crypto.Cipher.ARC4.new(self.key) self.buffer = '' def get(self, num_bytes): n = 0 result = [] while n < num_bytes: if not self.buffer: self.buffer = self.cipher.encrypt(self.data) part = self.buffer[:num_bytes - n] result.append(part) n += len(part) self.buffer = self.buffer[len(part):] return ''.join(result) class StepInfo(object): def __init__(self, label): self.label = label self.info = { 'step': label, } def add_info(self, key, value): self.info[key] = value def stop_timer(self): self.end = time.time() def __enter__(self): self.start = time.time() self.end = None return self def __exit__(self, exc_type, exc_val, exc_tb): if exc_type is None: if self.end is None: self.end = time.time() self.info['duration'] = self.end - self.start return False class ObnamBenchmark(object): def __init__(self, settings, results_dir, srctree, junk_generator): self.settings = settings self.results_dir = results_dir self.srctree = srctree self.junk_generator = junk_generator @classmethod def add_settings(self, settings): pass @property def benchmark_name(self): s = self.__class__.__name__ if s.endswith('Benchmark'): s = s[:-len('Benchmark')] return s def result_filename(self, label, suffix): return os.path.join( self.results_dir, '%s-%s%s' % (self.benchmark_name, label, suffix)) def run(self): self.tempdir = tempfile.mkdtemp() self.live_data = self.create_live_data_dir() self.repo = self.create_repo() step_infos = [] steps = [ ('create-live-data', self.create_live_data), ('initial-backup', self.backup), ('no-op-backup', self.backup), ('obnam-verify', self.obnam_verify), ('obnam-mount', self.obnam_mount), ('cleanup', lambda si: self.cleanup(si) if self.settings['cleanup'] else None), ] for label, method in steps: print ' %s' % label with StepInfo(label) as step_info: method(step_info) step_infos.append(step_info) return { 'steps': [step_info.info for step_info in step_infos], } def create_live_data_dir(self): live_data = os.path.join(self.tempdir, 'live-data') os.mkdir(live_data) return live_data def create_repo(self): repo = os.path.join(self.tempdir, 'repo') os.mkdir(repo) return repo def create_live_data(self, step_info): # Subclasses MUST override this. raise NotImplementedError() def backup(self, step_info): self.run_obnam( ['backup', '-r', self.repo, self.live_data], step_info.label) step_info.stop_timer() step_info.add_info('repo-size', self.sum_of_file_sizes(self.repo)) step_info.add_info( 'live-data-size', self.sum_of_file_sizes(self.live_data)) def obnam_verify(self, step_info): self.run_obnam( ['verify', '-r', self.repo], step_info.label) def obnam_mount(self, step_info): mount = os.path.join(self.tempdir, 'mount') os.mkdir(mount) self.run_obnam( ['mount', '-r', self.repo, '--to', mount], step_info.label) cliapp.runcmd(['tar', '-cf', '/dev/null', mount + '/.']) try: cliapp.runcmd(['fusermount', '-u', mount]) except cliapp.AppException as e: print 'ERROR from fusermount: %s' % str(e) def cleanup(self, step_info): shutil.rmtree(self.tempdir) def run_obnam(self, args, label): base_command = [ self.settings['obnam-cmd'], '--no-default-config', '--log', self.result_filename(label, '.log'), '--log-level', 'debug', ] env = dict(os.environ) env['OBNAM_PROFILE'] = self.result_filename(label, '.prof') cliapp.runcmd(base_command + args, env=env, cwd=self.srctree) def sum_of_file_sizes(self, root_dir): total = 0 for dirname, subdirs, basenames in os.walk(root_dir): for basename in basenames: pathname = os.path.join(dirname, basename) st = os.lstat(pathname) if stat.S_ISREG(st.st_mode): total += st.st_size return total class EmptyFilesBenchmark(ObnamBenchmark): files_per_dir = 1000 @classmethod def add_settings(self, settings): settings.integer( ['empty-files-count'], 'number of empty files for %s' % self.__class__.__name__, default=10**6) @property def num_files(self): return self.settings['empty-files-count'] def create_live_data(self, step_info): step_info.add_info('empty-files-count', self.num_files) for i in range(self.num_files): subdir = os.path.join( self.live_data, 'dir-%d' % (i / self.files_per_dir)) if (i % self.files_per_dir) == 0: os.mkdir(subdir) filename = os.path.join(subdir, 'file-%d' % i) with open(filename, 'w'): pass class SingleLargeFileBenchmark(ObnamBenchmark): @classmethod def add_settings(self, settings): settings.bytesize( ['single-large-file-size'], 'size of file to create for %s' % self.__class__.__name__, default='1TB') @property def file_size(self): return self.settings['single-large-file-size'] def create_live_data(self, step_info): step_info.add_info('single-large-file-size', self.file_size) filename = os.path.join(self.live_data, 'file.dat') with open(filename, 'w') as f: n = 0 max_chunk_size = 2**10 ts = ttystatus.TerminalStatus() ts['written'] = 0 ts['total'] = self.file_size ts.format( '%ElapsedTime() ' 'writing live data: %ByteSize(written) of %ByteSize(total) ' 'at %ByteSpeed(written) ' '(%PercentDone(written,total))') while n < self.file_size: num_bytes = min(max_chunk_size, self.file_size - n) data = self.junk_generator.get(num_bytes) f.write(data) n += len(data) ts['written'] = n ts.clear() ts.finish() class ObnamBenchmarkRunner(cliapp.Application): benchmark_classes = [ EmptyFilesBenchmark, SingleLargeFileBenchmark, ] def add_settings(self): self.settings.string( ['obnam-cmd'], 'use CMD as the argv[0] to invoke obnam', metavar='CMD', default='./obnam') self.settings.string( ['obnam-treeish'], 'run Obnam from TREEISH in its git repository', metavar='TREEISH', default='HEAD') self.settings.string( ['results-dir'], 'put results in DIR', metavar='DIR', default='.') self.settings.boolean( ['cleanup'], 'clean up after each benchmark?', default=True) for benchmark_class in self.benchmark_classes: benchmark_class.add_settings(self.settings) def process_args(self, args): results_dir = self.create_results_dir() self.store_settings_in_results(results_dir) result_obj = { 'system-info': self.get_system_info_dict(), 'versions': self.get_version_info_dict(), } srctree = self.prepare_source_tree() junk_generator = BinaryJunkGenerator() benchmark_infos = {} for benchmark_class in self.benchmark_classes: print 'Benchmark %s' % benchmark_class.__name__ benchmark = benchmark_class( self.settings, results_dir, srctree, junk_generator) benchmark_info = benchmark.run() benchmark_infos[benchmark.benchmark_name] = benchmark_info result_obj['benchmarks'] = benchmark_infos self.save_result_obj(results_dir, result_obj) shutil.rmtree(srctree) def create_results_dir(self): results = os.path.abspath(self.settings['results-dir']) if not os.path.exists(results): os.mkdir(results) return results def store_settings_in_results(self, results): cp = self.settings.as_cp() filename = os.path.join(results, 'obnam-benchmark.conf') with open(filename, 'w') as f: cp.write(f) def get_system_info_dict(self): return { 'hostname': platform.node(), 'machine': platform.machine(), 'architecture': platform.architecture(), 'uname': platform.uname(), } def get_version_info_dict(self): treeish = self.settings['obnam-treeish'] describe = cliapp.runcmd(['git', 'describe', treeish]).strip() return { 'obnam-treeish': treeish, 'obnam-version': describe, 'larch-version': larch.__version__, } def prepare_source_tree(self): srctree = tempfile.mkdtemp() self.extract_sources_from_git(srctree) self.build_obnam(srctree) return srctree def extract_sources_from_git(self, srctree): cliapp.runcmd( ['git', 'archive', self.settings['obnam-treeish']], ['tar', '-C', srctree, '-xf', '-']) def build_obnam(self, srctree): cliapp.runcmd( ['python', 'setup.py', 'build_ext', '-i'], cwd=srctree) def save_result_obj(self, results_dir, result_obj): filename = os.path.join(results_dir, 'benchmark.json') with open(filename, 'w') as f: json.dump(result_obj, f, indent=4) if __name__ == '__main__': ObnamBenchmarkRunner().run()