summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@iki.fi>2008-01-06 02:28:20 +0200
committerLars Wirzenius <liw@iki.fi>2008-01-06 02:28:20 +0200
commitf161fd5e5657c07e6e500b87aa66ff4c0f2a2e90 (patch)
tree22001a2c2f701963d8837aaad915d9fb2564a184
parentc516e99efde7142531b448fc4b939abedc258e65 (diff)
parent702c72e39bf9f4e03f26a9ab9dcc88fd37bca2ad (diff)
downloadgenbackupdata-f161fd5e5657c07e6e500b87aa66ff4c0f2a2e90.tar.gz
Merged change to make binary junk generation fast, without a --bad-binary-data option.
-rw-r--r--genbackupdata.py38
-rwxr-xr-xtests.py31
2 files changed, 27 insertions, 42 deletions
diff --git a/genbackupdata.py b/genbackupdata.py
index 934da96..b6251bc 100644
--- a/genbackupdata.py
+++ b/genbackupdata.py
@@ -71,11 +71,17 @@ class BackupData:
self._filename_counter = 0
self._current_dir_no = 0
self._next_filecount = 0
+
+ # The zlib compression algorithm gives up if it gets a block of
+ # 32 KiB bytes it can't find in its dictionary. It completely
+ # ignores such a block, meaning that if it is repeated, then
+ # it ignores it repeatedly. Most importantly for us, it doesn't
+ # compress the repeats, either. Thus, to generate lots of
+ # uncompressible binary data, we can generate a blob and repeat
+ # that. Thanks to Richard Braakman for the idea.
+ self._binary_blob_size = 64 * 1024 # Safety factor of 2
self._binary_blob = None
- def make_binary_data_generation_fast_but_bad(self):
- self.generate_binary_data = self.generate_binary_data_quickly
-
def set_directory(self, dirname):
"""Set the directory to be operated on
@@ -252,7 +258,7 @@ class BackupData:
def generate_binary_data_well(self, size):
"""Generate SIZE bytes of more or less random binary junk"""
-
+
# The following code has had some fine manual fine tuning done
# to it. This has made it ugly, but faster. On a 1.2 MHz Intel
# Pentium M, it generates around 6 MB/s.
@@ -277,14 +283,18 @@ class BackupData:
return "".join(chunks)
- generate_binary_data = generate_binary_data_well
-
- def generate_binary_data_quickly(self, size):
- """Generate SIZE bytes of binary junk, which may be compressible."""
+ def generate_binary_data(self, size):
+ """Generate SIZE bytes of binary junk.
+
+ This is different from generate_binary_data_well in that
+ it makes use of _binary_blob (and generates that if it does
+ not yet exist).
+
+ """
if self._binary_blob is None:
self._binary_blob = self.generate_binary_data_well(
- self._chunk_size)
+ self._binary_blob_size)
if size <= len(self._binary_blob):
return self._binary_blob[:size]
else:
@@ -469,13 +479,6 @@ class CommandLineParser:
metavar="SIZE",
help="Make new binary files be of size SIZE")
- p.add_option("--bad-binary-data",
- action="store_true",
- default=False,
- help="When generating binary data, generate it "
- "quickly, but in a way that does not make it "
- "uncompressible.")
-
p.add_option("-c", "--create",
action="store",
metavar="SIZE",
@@ -546,9 +549,6 @@ class CommandLineParser:
"""Parse command line arguments"""
options, args = self._parser.parse_args(args)
- if options.bad_binary_data:
- self._bd.make_binary_data_generation_fast_but_bad()
-
if options.seed:
self._bd.set_seed(int(options.seed))
diff --git a/tests.py b/tests.py
index edc0dd6..4466fc0 100755
--- a/tests.py
+++ b/tests.py
@@ -62,15 +62,6 @@ class BackupDataTests(unittest.TestCase):
del self.bd
self.remove_dir()
- def testHasRightDefaultBinaryDataGenerator(self):
- self.failUnlessEqual(self.bd.generate_binary_data,
- self.bd.generate_binary_data_well)
-
- def testSetsOtherBinaryDataGeneratorWhenRequested(self):
- self.bd.make_binary_data_generation_fast_but_bad()
- self.failUnlessEqual(self.bd.generate_binary_data,
- self.bd.generate_binary_data_quickly)
-
def testSetsDirectoryCorrect(self):
self.failUnlessEqual(self.bd.get_directory(), self.dirname)
@@ -226,20 +217,20 @@ class BackupDataTests(unittest.TestCase):
self.failUnlessEqual(self.bd.generate_text_data(n * 2),
genbackupdata.LOREM_IPSUM * 2)
- def testGeneratesRequestedAmountOfBinaryData(self):
- n = 128
- self.failUnlessEqual(len(self.bd.generate_binary_data(n)), n)
+ def testGeneratesRequestedAmountOfBinaryDataWell(self):
+ n = 37
+ self.failUnlessEqual(len(self.bd.generate_binary_data_well(n)), n)
def testGeneratesRequestedAmountOfBinaryDataQuickly(self):
n = 128
- self.failUnlessEqual(len(self.bd.generate_binary_data_quickly(n)), n)
+ self.failUnlessEqual(len(self.bd.generate_binary_data(n)), n)
- def testGeneratesRequestedLargeAmountOfBinaryDataQuickly(self):
- n = self.bd._chunk_size * 2 + 1
- self.failUnlessEqual(len(self.bd.generate_binary_data_quickly(n)), n)
+ def testGeneratesRequestedLargeAmountOfBinaryData(self):
+ n = self.bd._binary_blob_size + 1
+ self.failUnlessEqual(len(self.bd.generate_binary_data(n)), n)
def testGeneratesBinaryDataWhichDoesNotCompressWell(self):
- n = 10 * 1024
+ n = self.bd._binary_blob_size * 4
data = zlib.compress(self.bd.generate_binary_data(n))
self.failUnless(len(data) > 0.95* n)
@@ -442,12 +433,6 @@ class CommandLineParserTests(unittest.TestCase):
self.failUnlessEqual(self.bd.get_binary_file_size(),
genbackupdata.TiB)
- def testSetsBinaryGeneratorWhenRequested(self):
- options, args = self.clp.parse(["--bad-binary-data"])
- self.failUnlessEqual(args, [])
- self.failUnlessEqual(self.bd.generate_binary_data,
- self.bd.generate_binary_data_quickly)
-
def testHandlesOptionForCreate(self):
options, args = self.clp.parse(["--create=1t"])
self.failUnlessEqual(args, [])