summaryrefslogtreecommitdiff
path: root/genbackupdatalib/generator.py
blob: 8cf349cde2d6925732abfbd3495c3207ccb781db (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# Copyright 2010  Lars Wirzenius
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


import random
import struct


class DataGenerator(object):

    '''Generate random binary data.'''
    
    # We generate data by using a blob of suitable size. The output
    # sequence repeats the blob, where each repetition is preceded by
    # a 64-bit counter.
    #
    # We need to be relatively prime with obnam's chunk size, which
    # defaults to 64 KiB (65536 bytes). This is so that obnam does not
    # notice a lot of duplicated data, resulting in unrealistically
    # high amounts of compression in the backup store.
    #
    # Ideally, we would not generate any repeating data, but the random
    # number generator is not fast enough for that. We need to generate
    # data about as fast as the disk can write it, and the random number
    # generator is orders of magnitude slower than that.

    _blob_size = 65521
    _blob_size = 1021
    
    def __init__(self, seed):
        self._random = random.Random(seed)
        self._blob = self._generate_blob()
        self._counter = 0
        self._buffer = ''

    def _generate_blob(self):
        return ''.join(chr(self._random.randint(0, 255))
                       for i in range(self._blob_size))
        
    def generate(self, size):
        while size > len(self._buffer):
            self._buffer += self._generate_more_data()
        data = self._buffer[:size]
        self._buffer = self._buffer[size:]
        return data

    def _generate_more_data(self):
        self._counter += 1
        return struct.pack('!Q', self._counter) + self._blob