From 4210f10e6e786d8908f3d9872edf3cd61b9d224c Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Thu, 17 Mar 2016 19:47:46 +0200 Subject: Compute faster whole-file checksum for non-MD5 --- obnamlib/whole_file_checksummer.py | 9 +++++++-- obnamlib/whole_file_checksummer_tests.py | 13 ++++++++----- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/obnamlib/whole_file_checksummer.py b/obnamlib/whole_file_checksummer.py index b08655b9..0c49e05d 100644 --- a/obnamlib/whole_file_checksummer.py +++ b/obnamlib/whole_file_checksummer.py @@ -36,6 +36,7 @@ class WholeFileCheckSummer(object): ''' def __init__(self, file_key): + self._all_bytes = file_key == obnamlib.REPO_FILE_MD5 self._summer = self._create_checksum_algorithm(file_key) def _create_checksum_algorithm(self, file_key): @@ -44,8 +45,12 @@ class WholeFileCheckSummer(object): name = obnamlib.get_checksum_algorithm_name(file_key) return obnamlib.get_checksum_algorithm(name) - def append_chunk(self, chunk_data, token): - self._summer.update(chunk_data) + def append_chunk(self, chunk_data, chunk_id): + if self._all_bytes: + self._summer.update(chunk_data) + else: + thing = '{},{};'.format(len(chunk_data), chunk_id) + self._summer.update(thing) def get_checksum(self): '''Get the current whole-file checksum.''' diff --git a/obnamlib/whole_file_checksummer_tests.py b/obnamlib/whole_file_checksummer_tests.py index c06becab..0b9c7b9f 100644 --- a/obnamlib/whole_file_checksummer_tests.py +++ b/obnamlib/whole_file_checksummer_tests.py @@ -16,6 +16,7 @@ # =*= License: GPL-3+ =*= +import hashlib import unittest import obnamlib @@ -36,6 +37,7 @@ class WholeFileCheckSummerTests(unittest.TestCase): summer = obnamlib.WholeFileCheckSummer(obnamlib.REPO_FILE_MD5) chunk = 'hello' token = repo.prepare_chunk_for_indexes(chunk) + chunk_id = None summer.append_chunk(chunk, token) self.assertEqual( summer.get_checksum(), @@ -46,11 +48,12 @@ class WholeFileCheckSummerTests(unittest.TestCase): summer = obnamlib.WholeFileCheckSummer(obnamlib.REPO_FILE_SHA512) chunk = 'hello' token = repo.prepare_chunk_for_indexes(chunk) - summer.append_chunk(chunk, token) - self.assertEqual( - summer.get_checksum(), - '9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca7' - '2323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043') + chunk_id = '123' + summer.append_chunk(chunk, chunk_id) + + expected = hashlib.sha512('{},{};'.format(len(chunk), chunk_id)) + + self.assertEqual(summer.get_checksum(), expected.hexdigest()) class FakeRepository(object): -- cgit v1.2.1