diff options
author | Lars Wirzenius <liw@iki.fi> | 2008-01-06 02:25:02 +0200 |
---|---|---|
committer | Lars Wirzenius <liw@iki.fi> | 2008-01-06 02:25:02 +0200 |
commit | 702c72e39bf9f4e03f26a9ab9dcc88fd37bca2ad (patch) | |
tree | 22001a2c2f701963d8837aaad915d9fb2564a184 | |
parent | 09974c359d3d78cd215ff3e5c7faa55ba574dcaf (diff) | |
download | genbackupdata-702c72e39bf9f4e03f26a9ab9dcc88fd37bca2ad.tar.gz |
Use dark's idea for generating uncompressible binary junk quickly.
-rw-r--r-- | genbackupdata.py | 25 | ||||
-rwxr-xr-x | tests.py | 20 |
2 files changed, 27 insertions, 18 deletions
diff --git a/genbackupdata.py b/genbackupdata.py index ed41228..b6251bc 100644 --- a/genbackupdata.py +++ b/genbackupdata.py @@ -71,6 +71,15 @@ class BackupData: self._filename_counter = 0 self._current_dir_no = 0 self._next_filecount = 0 + + # The zlib compression algorithm gives up if it gets a block of + # 32 KiB bytes it can't find in its dictionary. It completely + # ignores such a block, meaning that if it is repeated, then + # it ignores it repeatedly. Most importantly for us, it doesn't + # compress the repeats, either. Thus, to generate lots of + # uncompressible binary data, we can generate a blob and repeat + # that. Thanks to Richard Braakman for the idea. + self._binary_blob_size = 64 * 1024 # Safety factor of 2 self._binary_blob = None def set_directory(self, dirname): @@ -249,7 +258,7 @@ class BackupData: def generate_binary_data_well(self, size): """Generate SIZE bytes of more or less random binary junk""" - + # The following code has had some fine manual fine tuning done # to it. This has made it ugly, but faster. On a 1.2 MHz Intel # Pentium M, it generates around 6 MB/s. @@ -274,14 +283,18 @@ class BackupData: return "".join(chunks) - generate_binary_data = generate_binary_data_well - - def generate_binary_data_quickly(self, size): - """Generate SIZE bytes of binary junk, which may be compressible.""" + def generate_binary_data(self, size): + """Generate SIZE bytes of binary junk. + + This is different from generate_binary_data_well in that + it makes use of _binary_blob (and generates that if it does + not yet exist). + + """ if self._binary_blob is None: self._binary_blob = self.generate_binary_data_well( - self._chunk_size) + self._binary_blob_size) if size <= len(self._binary_blob): return self._binary_blob[:size] else: @@ -62,10 +62,6 @@ class BackupDataTests(unittest.TestCase): del self.bd self.remove_dir() - def testHasRightDefaultBinaryDataGenerator(self): - self.failUnlessEqual(self.bd.generate_binary_data, - self.bd.generate_binary_data_well) - def testSetsDirectoryCorrect(self): self.failUnlessEqual(self.bd.get_directory(), self.dirname) @@ -221,20 +217,20 @@ class BackupDataTests(unittest.TestCase): self.failUnlessEqual(self.bd.generate_text_data(n * 2), genbackupdata.LOREM_IPSUM * 2) - def testGeneratesRequestedAmountOfBinaryData(self): - n = 128 - self.failUnlessEqual(len(self.bd.generate_binary_data(n)), n) + def testGeneratesRequestedAmountOfBinaryDataWell(self): + n = 37 + self.failUnlessEqual(len(self.bd.generate_binary_data_well(n)), n) def testGeneratesRequestedAmountOfBinaryDataQuickly(self): n = 128 - self.failUnlessEqual(len(self.bd.generate_binary_data_quickly(n)), n) + self.failUnlessEqual(len(self.bd.generate_binary_data(n)), n) - def testGeneratesRequestedLargeAmountOfBinaryDataQuickly(self): - n = self.bd._chunk_size * 2 + 1 - self.failUnlessEqual(len(self.bd.generate_binary_data_quickly(n)), n) + def testGeneratesRequestedLargeAmountOfBinaryData(self): + n = self.bd._binary_blob_size + 1 + self.failUnlessEqual(len(self.bd.generate_binary_data(n)), n) def testGeneratesBinaryDataWhichDoesNotCompressWell(self): - n = 10 * 1024 + n = self.bd._binary_blob_size * 4 data = zlib.compress(self.bd.generate_binary_data(n)) self.failUnless(len(data) > 0.95* n) |