Use dark's idea for generating uncompressible binary junk quickly.

author: Lars Wirzenius <liw@iki.fi> 2008-01-06 02:25:02 +0200
committer: Lars Wirzenius <liw@iki.fi> 2008-01-06 02:25:02 +0200
commit: 702c72e39bf9f4e03f26a9ab9dcc88fd37bca2ad (patch)
tree: 22001a2c2f701963d8837aaad915d9fb2564a184
parent: 09974c359d3d78cd215ff3e5c7faa55ba574dcaf (diff)
download: genbackupdata-702c72e39bf9f4e03f26a9ab9dcc88fd37bca2ad.tar.gz
2 files changed, 27 insertions, 18 deletions
diff --git a/genbackupdata.py b/genbackupdata.py
index ed41228..b6251bc 100644
--- a/genbackupdata.py
+++ b/genbackupdata.py
@@ -71,6 +71,15 @@ class BackupData:
         self._filename_counter = 0
         self._current_dir_no = 0
         self._next_filecount = 0
+        
+        # The zlib compression algorithm gives up if it gets a block of
+        # 32 KiB bytes it can't find in its dictionary. It completely
+        # ignores such a block, meaning that if it is repeated, then
+        # it ignores it repeatedly. Most importantly for us, it doesn't
+        # compress the repeats, either. Thus, to generate lots of
+        # uncompressible binary data, we can generate a blob and repeat
+        # that. Thanks to Richard Braakman for the idea.
+        self._binary_blob_size = 64 * 1024 # Safety factor of 2
         self._binary_blob = None
 
     def set_directory(self, dirname):
@@ -249,7 +258,7 @@ class BackupData:
 
     def generate_binary_data_well(self, size):
         """Generate SIZE bytes of more or less random binary junk"""
-
+        
         # The following code has had some fine manual fine tuning done
         # to it. This has made it ugly, but faster. On a 1.2 MHz Intel
         # Pentium M, it generates around 6 MB/s.
@@ -274,14 +283,18 @@ class BackupData:
     
         return "".join(chunks)
 
-    generate_binary_data = generate_binary_data_well
-
-    def generate_binary_data_quickly(self, size):
-        """Generate SIZE bytes of binary junk, which may be compressible."""
+    def generate_binary_data(self, size):
+        """Generate SIZE bytes of binary junk.
+        
+        This is different from generate_binary_data_well in that
+        it makes use of _binary_blob (and generates that if it does
+        not yet exist).
+        
+        """
         
         if self._binary_blob is None:
             self._binary_blob = self.generate_binary_data_well(
-                                    self._chunk_size)
+                                    self._binary_blob_size)
         if size <= len(self._binary_blob):
             return self._binary_blob[:size]
         else:
diff --git a/tests.py b/tests.py
index 8b618fb..4466fc0 100755
--- a/tests.py
+++ b/tests.py
@@ -62,10 +62,6 @@ class BackupDataTests(unittest.TestCase):
         del self.bd
         self.remove_dir()
 
-    def testHasRightDefaultBinaryDataGenerator(self):
-        self.failUnlessEqual(self.bd.generate_binary_data,
-                             self.bd.generate_binary_data_well)
-
     def testSetsDirectoryCorrect(self):
         self.failUnlessEqual(self.bd.get_directory(), self.dirname)
 
@@ -221,20 +217,20 @@ class BackupDataTests(unittest.TestCase):
         self.failUnlessEqual(self.bd.generate_text_data(n * 2),
                              genbackupdata.LOREM_IPSUM * 2)
 
-    def testGeneratesRequestedAmountOfBinaryData(self):
-        n = 128
-        self.failUnlessEqual(len(self.bd.generate_binary_data(n)), n)
+    def testGeneratesRequestedAmountOfBinaryDataWell(self):
+        n = 37
+        self.failUnlessEqual(len(self.bd.generate_binary_data_well(n)), n)
 
     def testGeneratesRequestedAmountOfBinaryDataQuickly(self):
         n = 128
-        self.failUnlessEqual(len(self.bd.generate_binary_data_quickly(n)), n)
+        self.failUnlessEqual(len(self.bd.generate_binary_data(n)), n)
 
-    def testGeneratesRequestedLargeAmountOfBinaryDataQuickly(self):
-        n = self.bd._chunk_size * 2 + 1
-        self.failUnlessEqual(len(self.bd.generate_binary_data_quickly(n)), n)
+    def testGeneratesRequestedLargeAmountOfBinaryData(self):
+        n = self.bd._binary_blob_size + 1
+        self.failUnlessEqual(len(self.bd.generate_binary_data(n)), n)
 
     def testGeneratesBinaryDataWhichDoesNotCompressWell(self):
-        n = 10 * 1024
+        n = self.bd._binary_blob_size * 4
         data = zlib.compress(self.bd.generate_binary_data(n))
         self.failUnless(len(data) > 0.95* n)
author	Lars Wirzenius <liw@iki.fi>	2008-01-06 02:25:02 +0200
committer	Lars Wirzenius <liw@iki.fi>	2008-01-06 02:25:02 +0200
commit	702c72e39bf9f4e03f26a9ab9dcc88fd37bca2ad (patch)
tree	22001a2c2f701963d8837aaad915d9fb2564a184
parent	09974c359d3d78cd215ff3e5c7faa55ba574dcaf (diff)
download	genbackupdata-702c72e39bf9f4e03f26a9ab9dcc88fd37bca2ad.tar.gz