Implement DataGenerator.

author: Lars Wirzenius <liw@liw.fi> 2011-01-02 13:55:43 +0000
committer: Lars Wirzenius <liw@liw.fi> 2011-01-02 13:55:43 +0000
commit: ed86ea775f6711d091500c0f08fba2b282fdfc42 (patch)
tree: 6e61f1af83ec1f1d3e78c2c866dcb080b4097eac
parent: a4ac69a5ef3e645e1814f73b075e55e69308487a (diff)
download: genbackupdata-ed86ea775f6711d091500c0f08fba2b282fdfc42.tar.gz
1 files changed, 38 insertions, 2 deletions
diff --git a/genbackupdatalib/generator.py b/genbackupdatalib/generator.py
index 58ad03d..8cf349c 100644
--- a/genbackupdatalib/generator.py
+++ b/genbackupdatalib/generator.py
@@ -14,13 +14,49 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 
+import random
+import struct
+
+
 class DataGenerator(object):
 
     '''Generate random binary data.'''
     
+    # We generate data by using a blob of suitable size. The output
+    # sequence repeats the blob, where each repetition is preceded by
+    # a 64-bit counter.
+    #
+    # We need to be relatively prime with obnam's chunk size, which
+    # defaults to 64 KiB (65536 bytes). This is so that obnam does not
+    # notice a lot of duplicated data, resulting in unrealistically
+    # high amounts of compression in the backup store.
+    #
+    # Ideally, we would not generate any repeating data, but the random
+    # number generator is not fast enough for that. We need to generate
+    # data about as fast as the disk can write it, and the random number
+    # generator is orders of magnitude slower than that.
+
+    _blob_size = 65521
+    _blob_size = 1021
+    
     def __init__(self, seed):
-        pass
+        self._random = random.Random(seed)
+        self._blob = self._generate_blob()
+        self._counter = 0
+        self._buffer = ''
+
+    def _generate_blob(self):
+        return ''.join(chr(self._random.randint(0, 255))
+                       for i in range(self._blob_size))
         
     def generate(self, size):
-        return ''
+        while size > len(self._buffer):
+            self._buffer += self._generate_more_data()
+        data = self._buffer[:size]
+        self._buffer = self._buffer[size:]
+        return data
+
+    def _generate_more_data(self):
+        self._counter += 1
+        return struct.pack('!Q', self._counter) + self._blob
author	Lars Wirzenius <liw@liw.fi>	2011-01-02 13:55:43 +0000
committer	Lars Wirzenius <liw@liw.fi>	2011-01-02 13:55:43 +0000
commit	ed86ea775f6711d091500c0f08fba2b282fdfc42 (patch)
tree	6e61f1af83ec1f1d3e78c2c866dcb080b4097eac
parent	a4ac69a5ef3e645e1814f73b075e55e69308487a (diff)
download	genbackupdata-ed86ea775f6711d091500c0f08fba2b282fdfc42.tar.gz