From adab59eb8def203a63323b07cb495295a00facb1 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Fri, 27 Mar 2015 20:00:40 +0200 Subject: Format settings creation Also remove unnecessary %default stuff. --- genbackupdata | 46 ++++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/genbackupdata b/genbackupdata index aa52903..551e2e2 100755 --- a/genbackupdata +++ b/genbackupdata @@ -26,26 +26,32 @@ import genbackupdatalib class GenbackupdataApp(cliapp.Application): def add_settings(self): - self.settings.bytesize(['create', 'c'], - 'how much data to create (default: %default)') - self.settings.bytesize(['file-size'], - 'size of one file (default: %default)', - default=16*1024) - self.settings.bytesize(['chunk-size'], - 'generate data in chunks of this size ' - '(default: %default)', - default=16*1024) - self.settings.integer(['depth'], - 'depth of directory tree (default: %default)', - default=3) - self.settings.integer(['max-files'], - 'max files/dirs per dir (default: %default)', - default=128) - self.settings.integer(['seed'], - 'seed for random number generator ' - '(default: %default)', - default=0) - self.settings.boolean(['quiet'], 'do not report progress') + self.settings.bytesize( + ['create', 'c'], + 'how much data to create (default: %default)') + self.settings.bytesize( + ['file-size'], + 'size of one file', + default=16*1024) + self.settings.bytesize( + ['chunk-size'], + 'generate data in chunks of this size', + default=16*1024) + self.settings.integer( + ['depth'], + 'depth of directory tree', + default=3) + self.settings.integer( + ['max-files'], + 'max files/dirs per dir', + default=128) + self.settings.integer( + ['seed'], + 'seed for random number generator', + default=0) + self.settings.boolean( + ['quiet'], + 'do not report progress') def process_args(self, args): outputdir = args[0] -- cgit v1.2.1 From 5c463021c87d41c4f1d42c785842e2c70680dc30 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Fri, 27 Mar 2015 20:01:36 +0200 Subject: Fix breaking of long lines to be nicer --- genbackupdata | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/genbackupdata b/genbackupdata index 551e2e2..9f70c5d 100755 --- a/genbackupdata +++ b/genbackupdata @@ -57,9 +57,8 @@ class GenbackupdataApp(cliapp.Application): outputdir = args[0] bytes = self.settings['create'] self.gen = genbackupdatalib.DataGenerator(self.settings['seed']) - self.names = genbackupdatalib.NameGenerator(outputdir, - self.settings['depth'], - self.settings['max-files']) + self.names = genbackupdatalib.NameGenerator( + outputdir, self.settings['depth'], self.settings['max-files']) self.setup_ttystatus() self.status['total'] = bytes @@ -110,4 +109,3 @@ class GenbackupdataApp(cliapp.Application): if __name__ == '__main__': GenbackupdataApp().run() - -- cgit v1.2.1 From a55aa9249cc8e0126efa0c1f0df8cf9bd158f002 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Fri, 27 Mar 2015 20:05:24 +0200 Subject: Refactor ttystatus formatting to use .format() --- genbackupdata | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/genbackupdata b/genbackupdata index 9f70c5d..7ed1ee8 100755 --- a/genbackupdata +++ b/genbackupdata @@ -96,15 +96,9 @@ class GenbackupdataApp(cliapp.Application): self.status.disable() self.status['written'] = 0 self.status['total'] = 0 - self.status.add(ttystatus.Literal('Generating: ')) - self.status.add(ttystatus.ByteSize('written')) - self.status.add(ttystatus.Literal(' of ')) - self.status.add(ttystatus.ByteSize('total')) - self.status.add(ttystatus.Literal(' ')) - self.status.add(ttystatus.PercentDone('written', 'total')) - self.status.add(ttystatus.Literal(' (')) - self.status.add(ttystatus.ByteSpeed('written')) - self.status.add(ttystatus.Literal(')')) + self.status.format( + 'Generating %ByteSize(written) of %ByteSize(total) ' + '%PercentDone(written,total) (%ByteSpeed(written))') if __name__ == '__main__': -- cgit v1.2.1 From 3e8bb3c6a264b0f525a52bcb363e99cb512d750e Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Fri, 27 Mar 2015 20:06:18 +0200 Subject: Refactor "f=open()...f.close()" to use "with open() as f:" --- genbackupdata | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/genbackupdata b/genbackupdata index 7ed1ee8..168961b 100755 --- a/genbackupdata +++ b/genbackupdata @@ -77,13 +77,12 @@ class GenbackupdataApp(cliapp.Application): dirname = os.path.dirname(pathname) if not os.path.exists(dirname): os.makedirs(dirname) - f = open(pathname, 'wb') - while bytes >= chunk_size: - self.write_bytes(f, chunk_size) - bytes -= chunk_size - if bytes > 0: - self.write_bytes(f, bytes) - f.close() + with open(pathname, 'wb') as f: + while bytes >= chunk_size: + self.write_bytes(f, chunk_size) + bytes -= chunk_size + if bytes > 0: + self.write_bytes(f, bytes) def write_bytes(self, f, bytes): chunk = self.gen.generate(bytes) -- cgit v1.2.1 From 73bb7a23a4b3c488d494fc3c2b8d9b5e84020669 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Fri, 27 Mar 2015 20:14:44 +0200 Subject: Use RC4 for generating jusnk Suggested-by: Rob Kendrick --- genbackupdatalib/generator.py | 44 +++++++++++++------------------------------ 1 file changed, 13 insertions(+), 31 deletions(-) diff --git a/genbackupdatalib/generator.py b/genbackupdatalib/generator.py index 8cf349c..227b6a4 100644 --- a/genbackupdatalib/generator.py +++ b/genbackupdatalib/generator.py @@ -14,49 +14,31 @@ # along with this program. If not, see . -import random import struct +import Crypto.Cipher.ARC4 + class DataGenerator(object): '''Generate random binary data.''' - # We generate data by using a blob of suitable size. The output - # sequence repeats the blob, where each repetition is preceded by - # a 64-bit counter. - # - # We need to be relatively prime with obnam's chunk size, which - # defaults to 64 KiB (65536 bytes). This is so that obnam does not - # notice a lot of duplicated data, resulting in unrealistically - # high amounts of compression in the backup store. - # - # Ideally, we would not generate any repeating data, but the random - # number generator is not fast enough for that. We need to generate - # data about as fast as the disk can write it, and the random number - # generator is orders of magnitude slower than that. - - _blob_size = 65521 - _blob_size = 1021 + _data = 'x' * 1024**2 def __init__(self, seed): - self._random = random.Random(seed) - self._blob = self._generate_blob() - self._counter = 0 + key = struct.pack('!Q', seed) + self._arc4 = Crypto.Cipher.ARC4.new(key) self._buffer = '' - def _generate_blob(self): - return ''.join(chr(self._random.randint(0, 255)) - for i in range(self._blob_size)) - def generate(self, size): - while size > len(self._buffer): - self._buffer += self._generate_more_data() + while len(self._buffer) < size: + self._buffer += self._generate_junk() + return self._split_off_data(size) + + def _generate_junk(self): + return self._arc4.encrypt(self._data) + + def _split_off_data(self, size): data = self._buffer[:size] self._buffer = self._buffer[size:] return data - - def _generate_more_data(self): - self._counter += 1 - return struct.pack('!Q', self._counter) + self._blob - -- cgit v1.2.1 From a3c9ca7069e56e1726a3095a465cd46f3b2ba3d8 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Fri, 27 Mar 2015 20:23:54 +0200 Subject: Reduce unnecessary string catenations --- genbackupdatalib/generator.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/genbackupdatalib/generator.py b/genbackupdatalib/generator.py index 227b6a4..9e3dea2 100644 --- a/genbackupdatalib/generator.py +++ b/genbackupdatalib/generator.py @@ -28,17 +28,22 @@ class DataGenerator(object): def __init__(self, seed): key = struct.pack('!Q', seed) self._arc4 = Crypto.Cipher.ARC4.new(key) - self._buffer = '' + self._buffer = [] + self._buffer_length = 0 def generate(self, size): - while len(self._buffer) < size: - self._buffer += self._generate_junk() + while self._buffer_length < size: + self._generate_junk() return self._split_off_data(size) def _generate_junk(self): - return self._arc4.encrypt(self._data) + junk = self._arc4.encrypt(self._data) + self._buffer.append(junk) + self._buffer_length += len(junk) def _split_off_data(self, size): - data = self._buffer[:size] - self._buffer = self._buffer[size:] + self._buffer = [''.join(self._buffer)] + data = self._buffer[0][:size] + self._buffer[0] = self._buffer[0][size:] + self._buffer_length -= len(data) return data -- cgit v1.2.1 From cc5f5029ba8ba32034f47b70ff3d1cfb3f56581d Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Fri, 27 Mar 2015 20:24:43 +0200 Subject: Update NEWS --- NEWS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/NEWS b/NEWS index 882cb8a..e3569ae 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,12 @@ NEWS for genbackupdata ====================== +Version 1.8, released UNRELEASED +-------------------------------- + +* Change how binary data gets generated. It is now much less repetetive. + Suggested by Rob Kendrick. + Version 1.7, released 2012-09-29 -------------------------------- -- cgit v1.2.1