diff options
author | Lars Wirzenius <liw@iki.fi> | 2007-07-24 22:38:01 +0300 |
---|---|---|
committer | Lars Wirzenius <liw@iki.fi> | 2007-07-24 22:38:01 +0300 |
commit | a1ea442cedd35f8c823a8e7df426c08e91254359 (patch) | |
tree | 48c35c8957dccb3f77dc9aeac38c60fb4ad566dc | |
parent | d98cfeb4f6892180b3522df33cfae6366864efb2 (diff) | |
download | genbackupdata-a1ea442cedd35f8c823a8e7df426c08e91254359.tar.gz |
Added test program to compare various algorithms.
-rw-r--r-- | binaryjunk.py | 209 |
1 files changed, 209 insertions, 0 deletions
diff --git a/binaryjunk.py b/binaryjunk.py new file mode 100644 index 0000000..70e9e38 --- /dev/null +++ b/binaryjunk.py @@ -0,0 +1,209 @@ +# Generate incompressible random data in various ways, and measure speeds. + + +import random +import gc +import timeit +import md5 +import sha +import zlib + + +def randint(size): + """just call random.randint(0, 255)""" + bytes = [] + for i in range(size): + bytes.append(chr(random.randint(0, 255))) + return "".join(bytes) + + +def getrandbits(size): + """just call random.getrandbits(8)""" + bytes = [] + for i in range(size): + bytes.append(chr(random.getrandbits(8))) + return "".join(bytes) + + +def md5ofgetrandbits(size): + """catenate successive MD5 of random byte stream""" + chunks = [] + sum = md5.new() + while size > 0: + sum.update(chr(random.getrandbits(8))) + chunk = sum.digest() + chunks.append(chunk) + size -= len(chunk) + if size < 0: + chunks[-1] = chunks[-1][:size] + return "".join(chunks) + + +def md5ofgetrandbits2(size): + """catenate successive MD5 of random byte stream""" + chunks = [] + sum = md5.new() + for i in range(size/16): + sum.update(chr(random.getrandbits(8))) + chunk = sum.digest() + chunks.append(chunk) + if len(chunks) * 16 < size: + sum.update(chr(random.getrandbits(8))) + chunk = sum.digest() + chunks.append(chunk[:size % 16]) + return "".join(chunks) + + +def sha1ofgetrandbits(size): + """catenate successive SHA1 of random byte stream""" + chunks = [] + sum = sha.new() + while size > 0: + sum.update(chr(random.getrandbits(8))) + chunk = sum.digest() + chunks.append(chunk) + size -= len(chunk) + if size < 0: + chunks[-1] = chunks[-1][:size] + return "".join(chunks) + + +def sha1ofgetrandbits2(size): + """catenate successive SHA1 of random byte stream""" + chunks = [] + sum = sha.new() + chunk_size = 20 + for i in range(size / chunk_size): + sum.update(chr(random.getrandbits(8))) + chunk = sum.digest() + chunks.append(chunk) + if size % chunk_size > 0: + sum.update(chr(random.getrandbits(8))) + chunk = sum.digest() + chunks.append(chunk[:size % chunk_size]) + return "".join(chunks) + + +def md5ofrandomandstatic(size): + """MD5 first of random byte stream, then constant""" + chunks = [] + sum = md5.new() + + initial_size = 128 + while size > 0 and initial_size > 0: + sum.update(chr(random.getrandbits(8))) + chunk = sum.digest() + chunks.append(chunk) + size -= len(chunk) + + while size > 0: + sum.update("a") + chunk = sum.digest() + chunks.append(chunk) + size -= len(chunk) + + if size < 0: + chunks[-1] = chunks[-1][:size] + + return "".join(chunks) + + +def md5ofrandomandstatic2(size): + """MD5 first of random byte stream, then constant""" + chunks = [] + sum = md5.new() + chunk_size = md5.digest_size + + initial_bytes = min(size, chunk_size * 8) + for i in range(initial_bytes / chunk_size): + sum.update(chr(random.getrandbits(8))) + chunks.append(sum.digest()) + + size -= len(chunks) * chunk_size + for i in range(size / chunk_size): + sum.update("a") + chunks.append(sum.digest()) + + if size % chunk_size > 0: + sum.update(chr(random.getrandbits(8))) + chunks.append(sum.digest()[:size % chunk_size]) + + return "".join(chunks) + + +def sha1ofrandomandstatic2(size): + """SHA1 first of random byte stream, then constant""" + chunks = [] + sum = sha.new() + chunk_size = 20 + + initial_bytes = min(size, 128) + for i in range(initial_bytes / chunk_size): + sum.update(chr(random.getrandbits(8))) + chunk = sum.digest() + chunks.append(chunk) + + size -= len(chunks) * chunk_size + for i in range(size / chunk_size): + sum.update("a") + chunk = sum.digest() + chunks.append(chunk) + + if size % chunk_size > 0: + sum.update(chr(random.getrandbits(8))) + chunk = sum.digest() + chunks.append(chunk[:size % chunk_size]) + + return "".join(chunks) + + +funcs = [ + randint, + getrandbits, + md5ofgetrandbits, + md5ofgetrandbits2, + sha1ofgetrandbits, + sha1ofgetrandbits2, + md5ofrandomandstatic, + md5ofrandomandstatic2, + sha1ofrandomandstatic2, + ] + + +def measure(func, block, count): + gc.collect() + timer = timeit.Timer(stmt='%s(%d)' % (func.func_name, block), + setup="from __main__ import %s" % func.func_name) + return min(timer.repeat(repeat=count, number=1)) + + +def check(func, block): + data = func(block) + assert len(data) == block, \ + "data is %d bytes, should be %d" % (len(data), block) + assert len(zlib.compress(data)) >= 0.9 * block, \ + "compressed data is %d bytes, should be at least %d" % \ + (len(zlib.compress(data)), 0.9 * block) + + +def main(): + block = 1024**2 + count = 10 + print "Measuring %d functions for generating uncompressible binary junk"%\ + len(funcs) + print "Each function generates %d times %d bytes" % (count, block) + print "This will take a while" + print + + namelen = max(len(func.func_name) for func in funcs) + + for func in funcs: + check(func, block) + secs = measure(func, block, count) + speed = block/secs/(1024**2) + print "%4.1f MB/s %-*s %s" % \ + (speed, namelen, func.func_name, func.__doc__) + + +if __name__ == "__main__": + main() |