diff options
Diffstat (limited to 'genbackupdatalib/generator.py')
-rw-r--r-- | genbackupdatalib/generator.py | 57 |
1 files changed, 22 insertions, 35 deletions
diff --git a/genbackupdatalib/generator.py b/genbackupdatalib/generator.py index 8cf349c..9e3dea2 100644 --- a/genbackupdatalib/generator.py +++ b/genbackupdatalib/generator.py @@ -14,49 +14,36 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. -import random import struct +import Crypto.Cipher.ARC4 + class DataGenerator(object): '''Generate random binary data.''' - # We generate data by using a blob of suitable size. The output - # sequence repeats the blob, where each repetition is preceded by - # a 64-bit counter. - # - # We need to be relatively prime with obnam's chunk size, which - # defaults to 64 KiB (65536 bytes). This is so that obnam does not - # notice a lot of duplicated data, resulting in unrealistically - # high amounts of compression in the backup store. - # - # Ideally, we would not generate any repeating data, but the random - # number generator is not fast enough for that. We need to generate - # data about as fast as the disk can write it, and the random number - # generator is orders of magnitude slower than that. - - _blob_size = 65521 - _blob_size = 1021 + _data = 'x' * 1024**2 def __init__(self, seed): - self._random = random.Random(seed) - self._blob = self._generate_blob() - self._counter = 0 - self._buffer = '' - - def _generate_blob(self): - return ''.join(chr(self._random.randint(0, 255)) - for i in range(self._blob_size)) - + key = struct.pack('!Q', seed) + self._arc4 = Crypto.Cipher.ARC4.new(key) + self._buffer = [] + self._buffer_length = 0 + def generate(self, size): - while size > len(self._buffer): - self._buffer += self._generate_more_data() - data = self._buffer[:size] - self._buffer = self._buffer[size:] + while self._buffer_length < size: + self._generate_junk() + return self._split_off_data(size) + + def _generate_junk(self): + junk = self._arc4.encrypt(self._data) + self._buffer.append(junk) + self._buffer_length += len(junk) + + def _split_off_data(self, size): + self._buffer = [''.join(self._buffer)] + data = self._buffer[0][:size] + self._buffer[0] = self._buffer[0][size:] + self._buffer_length -= len(data) return data - - def _generate_more_data(self): - self._counter += 1 - return struct.pack('!Q', self._counter) + self._blob - |