summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@iki.fi>2008-01-06 02:25:02 +0200
committerLars Wirzenius <liw@iki.fi>2008-01-06 02:25:02 +0200
commit702c72e39bf9f4e03f26a9ab9dcc88fd37bca2ad (patch)
tree22001a2c2f701963d8837aaad915d9fb2564a184
parent09974c359d3d78cd215ff3e5c7faa55ba574dcaf (diff)
downloadgenbackupdata-702c72e39bf9f4e03f26a9ab9dcc88fd37bca2ad.tar.gz
Use dark's idea for generating uncompressible binary junk quickly.
-rw-r--r--genbackupdata.py25
-rwxr-xr-xtests.py20
2 files changed, 27 insertions, 18 deletions
diff --git a/genbackupdata.py b/genbackupdata.py
index ed41228..b6251bc 100644
--- a/genbackupdata.py
+++ b/genbackupdata.py
@@ -71,6 +71,15 @@ class BackupData:
self._filename_counter = 0
self._current_dir_no = 0
self._next_filecount = 0
+
+ # The zlib compression algorithm gives up if it gets a block of
+ # 32 KiB bytes it can't find in its dictionary. It completely
+ # ignores such a block, meaning that if it is repeated, then
+ # it ignores it repeatedly. Most importantly for us, it doesn't
+ # compress the repeats, either. Thus, to generate lots of
+ # uncompressible binary data, we can generate a blob and repeat
+ # that. Thanks to Richard Braakman for the idea.
+ self._binary_blob_size = 64 * 1024 # Safety factor of 2
self._binary_blob = None
def set_directory(self, dirname):
@@ -249,7 +258,7 @@ class BackupData:
def generate_binary_data_well(self, size):
"""Generate SIZE bytes of more or less random binary junk"""
-
+
# The following code has had some fine manual fine tuning done
# to it. This has made it ugly, but faster. On a 1.2 MHz Intel
# Pentium M, it generates around 6 MB/s.
@@ -274,14 +283,18 @@ class BackupData:
return "".join(chunks)
- generate_binary_data = generate_binary_data_well
-
- def generate_binary_data_quickly(self, size):
- """Generate SIZE bytes of binary junk, which may be compressible."""
+ def generate_binary_data(self, size):
+ """Generate SIZE bytes of binary junk.
+
+ This is different from generate_binary_data_well in that
+ it makes use of _binary_blob (and generates that if it does
+ not yet exist).
+
+ """
if self._binary_blob is None:
self._binary_blob = self.generate_binary_data_well(
- self._chunk_size)
+ self._binary_blob_size)
if size <= len(self._binary_blob):
return self._binary_blob[:size]
else:
diff --git a/tests.py b/tests.py
index 8b618fb..4466fc0 100755
--- a/tests.py
+++ b/tests.py
@@ -62,10 +62,6 @@ class BackupDataTests(unittest.TestCase):
del self.bd
self.remove_dir()
- def testHasRightDefaultBinaryDataGenerator(self):
- self.failUnlessEqual(self.bd.generate_binary_data,
- self.bd.generate_binary_data_well)
-
def testSetsDirectoryCorrect(self):
self.failUnlessEqual(self.bd.get_directory(), self.dirname)
@@ -221,20 +217,20 @@ class BackupDataTests(unittest.TestCase):
self.failUnlessEqual(self.bd.generate_text_data(n * 2),
genbackupdata.LOREM_IPSUM * 2)
- def testGeneratesRequestedAmountOfBinaryData(self):
- n = 128
- self.failUnlessEqual(len(self.bd.generate_binary_data(n)), n)
+ def testGeneratesRequestedAmountOfBinaryDataWell(self):
+ n = 37
+ self.failUnlessEqual(len(self.bd.generate_binary_data_well(n)), n)
def testGeneratesRequestedAmountOfBinaryDataQuickly(self):
n = 128
- self.failUnlessEqual(len(self.bd.generate_binary_data_quickly(n)), n)
+ self.failUnlessEqual(len(self.bd.generate_binary_data(n)), n)
- def testGeneratesRequestedLargeAmountOfBinaryDataQuickly(self):
- n = self.bd._chunk_size * 2 + 1
- self.failUnlessEqual(len(self.bd.generate_binary_data_quickly(n)), n)
+ def testGeneratesRequestedLargeAmountOfBinaryData(self):
+ n = self.bd._binary_blob_size + 1
+ self.failUnlessEqual(len(self.bd.generate_binary_data(n)), n)
def testGeneratesBinaryDataWhichDoesNotCompressWell(self):
- n = 10 * 1024
+ n = self.bd._binary_blob_size * 4
data = zlib.compress(self.bd.generate_binary_data(n))
self.failUnless(len(data) > 0.95* n)