#!/usr/bin/env python
#
# Copyright 2014 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import json
import logging
import os
import platform
import re
import shutil
import stat
import tempfile
import time
import cliapp
import Crypto.Cipher.ARC4
import larch
import ttystatus
class BinaryJunkGenerator(object):
key = b'obnam-benchmark'
data = b'fake live data' * 1024
def __init__(self):
self.cipher = Crypto.Cipher.ARC4.new(self.key)
self.buffer = ''
def get(self, num_bytes):
n = 0
result = []
while n < num_bytes:
if not self.buffer:
self.buffer = self.cipher.encrypt(self.data)
part = self.buffer[:num_bytes - n]
result.append(part)
n += len(part)
self.buffer = self.buffer[len(part):]
return ''.join(result)
class StepInfo(object):
def __init__(self, label):
self.label = label
self.info = {
'step': label,
}
def add_info(self, key, value):
self.info[key] = value
def stop_timer(self):
self.end = time.time()
def __enter__(self):
self.start = time.time()
self.end = None
return self
def __exit__(self, exc_type, exc_val, exc_tb):
if exc_type is None:
if self.end is None:
self.end = time.time()
self.info['duration'] = self.end - self.start
return False
class ObnamBenchmark(object):
def __init__(self, settings, results_dir, srctree, junk_generator, output):
self.settings = settings
self.results_dir = results_dir
self.srctree = srctree
self.junk_generator = junk_generator
self.output = output
@classmethod
def add_settings(self, settings):
pass
@property
def benchmark_name(self):
s = self.__class__.__name__
if s.endswith('Benchmark'):
s = s[:-len('Benchmark')]
return s
def result_filename(self, label, suffix):
return os.path.join(
self.results_dir,
'%s-%s%s' % (self.benchmark_name, label, suffix))
def run(self):
self.tempdir = tempfile.mkdtemp()
self.live_data = self.create_live_data_dir()
self.repo = self.create_repo()
step_infos = []
steps = [
('create-live-data', self.create_live_data),
('initial-backup', self.backup),
('no-op-backup', self.backup),
('obnam-verify', self.obnam_verify),
('obnam-mount', self.obnam_mount),
('cleanup',
lambda si:
self.cleanup(si) if self.settings['cleanup'] else None),
]
for label, method in steps:
self.output.write(' %s\n' % label)
self.output.flush()
with StepInfo(label) as step_info:
method(step_info)
step_infos.append(step_info)
return {
'steps': [step_info.info for step_info in step_infos],
}
def create_live_data_dir(self):
live_data = os.path.join(self.tempdir, 'live-data')
os.mkdir(live_data)
return live_data
def create_repo(self):
repo = os.path.join(self.tempdir, 'repo')
os.mkdir(repo)
return repo
def create_live_data(self, step_info):
# Subclasses MUST override this.
raise NotImplementedError()
def backup(self, step_info):
log_file = self.log_file_name(step_info.label)
self.run_obnam(
['backup', '-r', self.repo, self.live_data],
step_info.label, log_file)
step_info.stop_timer()
step_info.add_info('repo-size', self.sum_of_file_sizes(self.repo))
step_info.add_info(
'live-data-size', self.sum_of_file_sizes(self.live_data))
step_info.add_info(
'downloaded-bytes', self.bytes_downloaded_from_repo(log_file))
step_info.add_info(
'uploaded-bytes', self.bytes_uploaded_to_repo(log_file))
def log_file_name(self, label):
return self.result_filename(label, '.log')
def obnam_verify(self, step_info):
log_file = self.log_file_name(step_info.label)
self.run_obnam(
['verify', '-r', self.repo],
step_info.label, log_file)
def obnam_mount(self, step_info):
mount = os.path.join(self.tempdir, 'mount')
os.mkdir(mount)
log_file = self.log_file_name(step_info.label)
self.run_obnam(
['mount', '-r', self.repo, '--to', mount],
step_info.label, log_file)
cliapp.runcmd(['tar', '-cf', '/dev/null', mount + '/.'])
try:
cliapp.runcmd(['fusermount', '-u', mount])
except cliapp.AppException as e:
sys.stderr.write('ERROR from fusermount: %s\n' % str(e))
def cleanup(self, step_info):
shutil.rmtree(self.tempdir)
def run_obnam(self, args, label, log_file):
base_command = [
self.settings['obnam-cmd'],
'--no-default-config',
'--checkpoint=10000T',
'--log', log_file,
'--log-level', 'debug',
]
env = dict(os.environ)
env['OBNAM_PROFILE'] = self.result_filename(label, '.prof')
cliapp.runcmd(base_command + args, env=env, cwd=self.srctree)
def sum_of_file_sizes(self, root_dir):
total = 0
for dirname, subdirs, basenames in os.walk(root_dir):
for basename in basenames:
pathname = os.path.join(dirname, basename)
st = os.lstat(pathname)
if stat.S_ISREG(st.st_mode):
total += st.st_size
return total
def bytes_downloaded_from_repo(self, log_file):
pattern = r'INFO \* total downloaded data.* (?P\d+) bytes'
return self.get_transferred_bytes_from_log_file(log_file, pattern)
def bytes_uploaded_to_repo(self, log_file):
pattern = r'INFO \* total uploaded data.* (?P\d+) bytes'
return self.get_transferred_bytes_from_log_file(log_file, pattern)
def get_transferred_bytes_from_log_file(self, log_file, pattern):
data = self.tail(log_file)
m = re.search(pattern, data)
if m:
return int(m.group('bytes'))
return 0
def tail(self, filename):
kibibyte = 1024
tail_length = 10 * kibibyte
with open(filename, 'rb') as f:
f.seek(0, os.SEEK_END)
length = f.tell()
f.seek(max(0, length - tail_length), os.SEEK_SET)
return f.read()
class EmptyFilesBenchmark(ObnamBenchmark):
files_per_dir = 1000
@classmethod
def add_settings(self, settings):
settings.integer(
['empty-files-count'],
'number of empty files for %s' % self.__class__.__name__,
default=10**6)
@property
def num_files(self):
return self.settings['empty-files-count']
def create_live_data(self, step_info):
step_info.add_info('empty-files-count', self.num_files)
for i in range(self.num_files):
subdir = os.path.join(
self.live_data, 'dir-%d' % (i / self.files_per_dir))
if (i % self.files_per_dir) == 0:
os.mkdir(subdir)
filename = os.path.join(subdir, 'file-%d' % i)
with open(filename, 'w'):
pass
class SingleLargeFileBenchmark(ObnamBenchmark):
@classmethod
def add_settings(self, settings):
settings.bytesize(
['single-large-file-size'],
'size of file to create for %s' % self.__class__.__name__,
default='1TB')
@property
def file_size(self):
return self.settings['single-large-file-size']
def create_live_data(self, step_info):
step_info.add_info('single-large-file-size', self.file_size)
filename = os.path.join(self.live_data, 'file.dat')
with open(filename, 'w') as f:
n = 0
max_chunk_size = 2**10
ts = ttystatus.TerminalStatus()
ts['written'] = 0
ts['total'] = self.file_size
ts.format(
'%ElapsedTime() '
'writing live data: %ByteSize(written) of %ByteSize(total) '
'at %ByteSpeed(written) '
'(%PercentDone(written,total))')
while n < self.file_size:
num_bytes = min(max_chunk_size, self.file_size - n)
data = self.junk_generator.get(num_bytes)
f.write(data)
n += len(data)
ts['written'] = n
ts.clear()
ts.finish()
class ObnamBenchmarkRunner(cliapp.Application):
benchmark_classes = [
EmptyFilesBenchmark,
SingleLargeFileBenchmark,
]
def add_settings(self):
self.settings.string(
['obnam-cmd'],
'use CMD as the argv[0] to invoke obnam',
metavar='CMD',
default='./obnam')
self.settings.string(
['obnam-treeish'],
'run Obnam from TREEISH in its git repository',
metavar='TREEISH',
default='HEAD')
self.settings.string(
['results-dir'],
'put results in DIR',
metavar='DIR',
default='.')
self.settings.boolean(
['cleanup'],
'clean up after each benchmark?',
default=True)
for benchmark_class in self.benchmark_classes:
benchmark_class.add_settings(self.settings)
def process_args(self, args):
results_dir = self.create_results_dir()
self.store_settings_in_results(results_dir)
result_obj = {
'system-info': self.get_system_info_dict(),
'versions': self.get_version_info_dict(),
}
srctree = self.prepare_source_tree()
junk_generator = BinaryJunkGenerator()
benchmark_infos = {}
for benchmark_class in self.benchmark_classes:
self.output.write('Benchmark %s\n' % benchmark_class.__name__)
self.output.flush()
benchmark = benchmark_class(
self.settings, results_dir, srctree, junk_generator,
self.output)
benchmark_info = benchmark.run()
benchmark_infos[benchmark.benchmark_name] = benchmark_info
result_obj['benchmarks'] = benchmark_infos
self.save_result_obj(results_dir, result_obj)
shutil.rmtree(srctree)
def create_results_dir(self):
results = os.path.abspath(self.settings['results-dir'])
if not os.path.exists(results):
os.mkdir(results)
return results
def store_settings_in_results(self, results):
cp = self.settings.as_cp()
filename = os.path.join(results, 'obnam-benchmark.conf')
with open(filename, 'w') as f:
cp.write(f)
def get_system_info_dict(self):
return {
'hostname': platform.node(),
'machine': platform.machine(),
'architecture': platform.architecture(),
'uname': platform.uname(),
}
def get_version_info_dict(self):
treeish = self.settings['obnam-treeish']
describe = cliapp.runcmd(['git', 'describe', treeish]).strip()
return {
'obnam-treeish': treeish,
'obnam-version': describe,
'larch-version': larch.__version__,
}
def prepare_source_tree(self):
srctree = tempfile.mkdtemp()
self.extract_sources_from_git(srctree)
self.build_obnam(srctree)
return srctree
def extract_sources_from_git(self, srctree):
cliapp.runcmd(
['git', 'archive', self.settings['obnam-treeish']],
['tar', '-C', srctree, '-xf', '-'])
def build_obnam(self, srctree):
cliapp.runcmd(
['python', 'setup.py', 'build_ext', '-i'],
cwd=srctree)
def save_result_obj(self, results_dir, result_obj):
filename = os.path.join(results_dir, 'benchmark.json')
with open(filename, 'w') as f:
json.dump(result_obj, f, indent=4)
if __name__ == '__main__':
ObnamBenchmarkRunner().run()