From 8125661b8af9b725d675faee982f6bd16f402aef Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Thu, 17 Mar 2016 18:23:43 +0200 Subject: Add whole file checksummer class This also adds MD5 to the checksummer.py file so we don't need to special case it. --- obnamlib/__init__.py | 14 ++++++- obnamlib/checksummer.py | 32 +++++++++------ obnamlib/checksummer_tests.py | 10 +++++ obnamlib/repo_interface.py | 4 ++ obnamlib/whole_file_checksummer.py | 62 +++++++++++++++++++++++++++++ obnamlib/whole_file_checksummer_tests.py | 67 ++++++++++++++++++++++++++++++++ 6 files changed, 176 insertions(+), 13 deletions(-) create mode 100644 obnamlib/whole_file_checksummer.py create mode 100644 obnamlib/whole_file_checksummer_tests.py diff --git a/obnamlib/__init__.py b/obnamlib/__init__.py index 94527275..e1d58c53 100644 --- a/obnamlib/__init__.py +++ b/obnamlib/__init__.py @@ -73,8 +73,6 @@ from .encryption import ( EncryptionError, GpgError) -from .checksummer import checksum_algorithms, get_checksum_algorithm - from .hooks import ( Hook, MissingFilterError, NoFilterTagError, FilterHook, HookManager) from .pluginbase import ObnamPlugin @@ -152,9 +150,21 @@ from .repo_interface import ( REPO_FILE_DEV, REPO_FILE_INO, REPO_FILE_MD5, + REPO_FILE_SHA224, + REPO_FILE_SHA256, + REPO_FILE_SHA384, + REPO_FILE_SHA512, REPO_FILE_INTEGER_KEYS, metadata_file_key_mapping) +from .checksummer import ( + checksum_algorithms, + get_checksum_algorithm, + get_checksum_algorithm_name, +) + +from .whole_file_checksummer import WholeFileCheckSummer + from .delegator import RepositoryDelegator, GenerationId from .backup_progress import BackupProgress diff --git a/obnamlib/checksummer.py b/obnamlib/checksummer.py index 74ea35fc..f1c8e62f 100644 --- a/obnamlib/checksummer.py +++ b/obnamlib/checksummer.py @@ -21,21 +21,31 @@ import hashlib import obnamlib -_algorithms = { - 'sha224': hashlib.sha224, - 'sha256': hashlib.sha256, - 'sha384': hashlib.sha384, - 'sha512': hashlib.sha512, -} +_algorithm_list = [ + ('md5', obnamlib.REPO_FILE_MD5, hashlib.md5), + ('sha224', obnamlib.REPO_FILE_SHA224, hashlib.sha224), + ('sha256', obnamlib.REPO_FILE_SHA256, hashlib.sha256), + ('sha384', obnamlib.REPO_FILE_SHA384, hashlib.sha384), + ('sha512', obnamlib.REPO_FILE_SHA512, hashlib.sha512), +] -checksum_algorithms = _algorithms.keys() +checksum_algorithms = [name for name, _, _ in _algorithm_list] -def get_checksum_algorithm(name): - if name in _algorithms: - return _algorithms[name]() - raise UnknownChecksumAlgorithm(algorithm=name) +def get_checksum_algorithm(wanted): + for name, _, func in _algorithm_list: + if wanted == name: + return func() + raise UnknownChecksumAlgorithm(algorithm=wanted) + + +def get_checksum_algorithm_name(wanted_key): + for name, key, _ in _algorithm_list: + if key == wanted_key: + return name + raise UnknownChecksumAlgorithm( + algorithm=obnamlib.repo_key_name(wanted_key)) class UnknownChecksumAlgorithm(obnamlib.ObnamError): diff --git a/obnamlib/checksummer_tests.py b/obnamlib/checksummer_tests.py index 10473740..a6c345f4 100644 --- a/obnamlib/checksummer_tests.py +++ b/obnamlib/checksummer_tests.py @@ -31,6 +31,16 @@ class TestGetChecksummer(unittest.TestCase): self.assertRaises( obnamlib.ObnamError, obnamlib.get_checksum_algorithm, 'unknown') + def test_knows_file_key_for_sha512(self): + self.assertEqual( + obnamlib.get_checksum_algorithm_name(obnamlib.REPO_FILE_SHA512), + 'sha512') + + def test_raises_error_if_algorithm_is_unknown_for_key(self): + self.assertRaises( + obnamlib.ObnamError, + obnamlib.get_checksum_algorithm_name, -1) + def test_returns_working_sha512(self): summer = obnamlib.get_checksum_algorithm('sha512') summer.update('hello, world') diff --git a/obnamlib/repo_interface.py b/obnamlib/repo_interface.py index 487d8672..6ab93ccd 100644 --- a/obnamlib/repo_interface.py +++ b/obnamlib/repo_interface.py @@ -56,6 +56,10 @@ REPO_FILE_GROUPNAME = _get_next_id() REPO_FILE_SYMLINK_TARGET = _get_next_id() REPO_FILE_XATTR_BLOB = _get_next_id() REPO_FILE_MD5 = _get_next_id() +REPO_FILE_SHA224 = _get_next_id() +REPO_FILE_SHA256 = _get_next_id() +REPO_FILE_SHA384 = _get_next_id() +REPO_FILE_SHA512 = _get_next_id() _MAX_STRING_KEY = REPO_FILE_MD5 diff --git a/obnamlib/whole_file_checksummer.py b/obnamlib/whole_file_checksummer.py new file mode 100644 index 00000000..517c09a1 --- /dev/null +++ b/obnamlib/whole_file_checksummer.py @@ -0,0 +1,62 @@ +# Copyright 2016 Lars Wirzenius +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# =*= License: GPL-3+ =*= + + +import hashlib + +import obnamlib + + +class WholeFileCheckSummer(object): + + '''Compute a whole-file checksum. + + Ask the repository its preferred checksum algorithm. Use that. + + If the algorithm is MD5, compute the checksum from all the bytes + in the file. For everything else, compute the checksum from (size, + checksum) pairs for all the chunks in the file. This convoluted + thing is because the latter is necessary for speed, and the former + is necessary for backwards compatibilty. + + ''' + + def __init__(self, repo): + self._summer = self._create_checksum_algorithm(repo) + + def _create_checksum_algorithm(self, repo): + file_key = repo.get_file_checksum_key() + if file_key is None: + return _NullChecksum() + name = obnamlib.get_checksum_algorithm_name(file_key) + return obnamlib.get_checksum_algorithm(name) + + def append_chunk(self, chunk_data, token): + self._summer.update(chunk_data) + + def get_checksum(self): + '''Get the current whole-file checksum.''' + return self._summer.hexdigest() + + +class _NullChecksum(object): + + def update(self, data): + pass + + def hexdigest(self): + return None diff --git a/obnamlib/whole_file_checksummer_tests.py b/obnamlib/whole_file_checksummer_tests.py new file mode 100644 index 00000000..c2addbec --- /dev/null +++ b/obnamlib/whole_file_checksummer_tests.py @@ -0,0 +1,67 @@ +# Copyright 2016 Lars Wirzenius +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# =*= License: GPL-3+ =*= + + +import unittest + +import obnamlib + + +class WholeFileCheckSummerTests(unittest.TestCase): + + def test_computes_nothing_if_repo_wants_no_checksum(self): + repo = FakeRepository(None) + summer = obnamlib.WholeFileCheckSummer(repo) + chunk = 'hello' + token = repo.prepare_chunk_for_indexes(chunk) + summer.append_chunk(chunk, token) + self.assertEqual(summer.get_checksum(), None) + + def test_computes_checksum_for_md5(self): + repo = FakeRepository(obnamlib.REPO_FILE_MD5) + summer = obnamlib.WholeFileCheckSummer(repo) + chunk = 'hello' + token = repo.prepare_chunk_for_indexes(chunk) + summer.append_chunk(chunk, token) + self.assertEqual( + summer.get_checksum(), + '5d41402abc4b2a76b9719d911017c592') + + def test_computes_checksum_for_sha512(self): + repo = FakeRepository(obnamlib.REPO_FILE_SHA512) + summer = obnamlib.WholeFileCheckSummer(repo) + chunk = 'hello' + token = repo.prepare_chunk_for_indexes(chunk) + summer.append_chunk(chunk, token) + self.assertEqual( + summer.get_checksum(), + '9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca7' + '2323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043') + + +class FakeRepository(object): + + def __init__(self, file_key): + self._file_key = file_key + + def get_file_checksum_key(self): + return self._file_key + + def prepare_chunk_for_indexes(self, data): + if self._file_key is None: + return None + return 'fake checksum' -- cgit v1.2.1