summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2016-03-17 18:23:43 +0200
committerLars Wirzenius <liw@liw.fi>2016-03-17 20:54:19 +0200
commit8125661b8af9b725d675faee982f6bd16f402aef (patch)
treef2b198567f00a194f456def1f7d2bcd7ddc7b81a
parent96d21a7b501e453376be19c426059e8c392a8173 (diff)
downloadobnam-8125661b8af9b725d675faee982f6bd16f402aef.tar.gz
Add whole file checksummer class
This also adds MD5 to the checksummer.py file so we don't need to special case it.
-rw-r--r--obnamlib/__init__.py14
-rw-r--r--obnamlib/checksummer.py32
-rw-r--r--obnamlib/checksummer_tests.py10
-rw-r--r--obnamlib/repo_interface.py4
-rw-r--r--obnamlib/whole_file_checksummer.py62
-rw-r--r--obnamlib/whole_file_checksummer_tests.py67
6 files changed, 176 insertions, 13 deletions
diff --git a/obnamlib/__init__.py b/obnamlib/__init__.py
index 94527275..e1d58c53 100644
--- a/obnamlib/__init__.py
+++ b/obnamlib/__init__.py
@@ -73,8 +73,6 @@ from .encryption import (
EncryptionError,
GpgError)
-from .checksummer import checksum_algorithms, get_checksum_algorithm
-
from .hooks import (
Hook, MissingFilterError, NoFilterTagError, FilterHook, HookManager)
from .pluginbase import ObnamPlugin
@@ -152,9 +150,21 @@ from .repo_interface import (
REPO_FILE_DEV,
REPO_FILE_INO,
REPO_FILE_MD5,
+ REPO_FILE_SHA224,
+ REPO_FILE_SHA256,
+ REPO_FILE_SHA384,
+ REPO_FILE_SHA512,
REPO_FILE_INTEGER_KEYS,
metadata_file_key_mapping)
+from .checksummer import (
+ checksum_algorithms,
+ get_checksum_algorithm,
+ get_checksum_algorithm_name,
+)
+
+from .whole_file_checksummer import WholeFileCheckSummer
+
from .delegator import RepositoryDelegator, GenerationId
from .backup_progress import BackupProgress
diff --git a/obnamlib/checksummer.py b/obnamlib/checksummer.py
index 74ea35fc..f1c8e62f 100644
--- a/obnamlib/checksummer.py
+++ b/obnamlib/checksummer.py
@@ -21,21 +21,31 @@ import hashlib
import obnamlib
-_algorithms = {
- 'sha224': hashlib.sha224,
- 'sha256': hashlib.sha256,
- 'sha384': hashlib.sha384,
- 'sha512': hashlib.sha512,
-}
+_algorithm_list = [
+ ('md5', obnamlib.REPO_FILE_MD5, hashlib.md5),
+ ('sha224', obnamlib.REPO_FILE_SHA224, hashlib.sha224),
+ ('sha256', obnamlib.REPO_FILE_SHA256, hashlib.sha256),
+ ('sha384', obnamlib.REPO_FILE_SHA384, hashlib.sha384),
+ ('sha512', obnamlib.REPO_FILE_SHA512, hashlib.sha512),
+]
-checksum_algorithms = _algorithms.keys()
+checksum_algorithms = [name for name, _, _ in _algorithm_list]
-def get_checksum_algorithm(name):
- if name in _algorithms:
- return _algorithms[name]()
- raise UnknownChecksumAlgorithm(algorithm=name)
+def get_checksum_algorithm(wanted):
+ for name, _, func in _algorithm_list:
+ if wanted == name:
+ return func()
+ raise UnknownChecksumAlgorithm(algorithm=wanted)
+
+
+def get_checksum_algorithm_name(wanted_key):
+ for name, key, _ in _algorithm_list:
+ if key == wanted_key:
+ return name
+ raise UnknownChecksumAlgorithm(
+ algorithm=obnamlib.repo_key_name(wanted_key))
class UnknownChecksumAlgorithm(obnamlib.ObnamError):
diff --git a/obnamlib/checksummer_tests.py b/obnamlib/checksummer_tests.py
index 10473740..a6c345f4 100644
--- a/obnamlib/checksummer_tests.py
+++ b/obnamlib/checksummer_tests.py
@@ -31,6 +31,16 @@ class TestGetChecksummer(unittest.TestCase):
self.assertRaises(
obnamlib.ObnamError, obnamlib.get_checksum_algorithm, 'unknown')
+ def test_knows_file_key_for_sha512(self):
+ self.assertEqual(
+ obnamlib.get_checksum_algorithm_name(obnamlib.REPO_FILE_SHA512),
+ 'sha512')
+
+ def test_raises_error_if_algorithm_is_unknown_for_key(self):
+ self.assertRaises(
+ obnamlib.ObnamError,
+ obnamlib.get_checksum_algorithm_name, -1)
+
def test_returns_working_sha512(self):
summer = obnamlib.get_checksum_algorithm('sha512')
summer.update('hello, world')
diff --git a/obnamlib/repo_interface.py b/obnamlib/repo_interface.py
index 487d8672..6ab93ccd 100644
--- a/obnamlib/repo_interface.py
+++ b/obnamlib/repo_interface.py
@@ -56,6 +56,10 @@ REPO_FILE_GROUPNAME = _get_next_id()
REPO_FILE_SYMLINK_TARGET = _get_next_id()
REPO_FILE_XATTR_BLOB = _get_next_id()
REPO_FILE_MD5 = _get_next_id()
+REPO_FILE_SHA224 = _get_next_id()
+REPO_FILE_SHA256 = _get_next_id()
+REPO_FILE_SHA384 = _get_next_id()
+REPO_FILE_SHA512 = _get_next_id()
_MAX_STRING_KEY = REPO_FILE_MD5
diff --git a/obnamlib/whole_file_checksummer.py b/obnamlib/whole_file_checksummer.py
new file mode 100644
index 00000000..517c09a1
--- /dev/null
+++ b/obnamlib/whole_file_checksummer.py
@@ -0,0 +1,62 @@
+# Copyright 2016 Lars Wirzenius
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# =*= License: GPL-3+ =*=
+
+
+import hashlib
+
+import obnamlib
+
+
+class WholeFileCheckSummer(object):
+
+ '''Compute a whole-file checksum.
+
+ Ask the repository its preferred checksum algorithm. Use that.
+
+ If the algorithm is MD5, compute the checksum from all the bytes
+ in the file. For everything else, compute the checksum from (size,
+ checksum) pairs for all the chunks in the file. This convoluted
+ thing is because the latter is necessary for speed, and the former
+ is necessary for backwards compatibilty.
+
+ '''
+
+ def __init__(self, repo):
+ self._summer = self._create_checksum_algorithm(repo)
+
+ def _create_checksum_algorithm(self, repo):
+ file_key = repo.get_file_checksum_key()
+ if file_key is None:
+ return _NullChecksum()
+ name = obnamlib.get_checksum_algorithm_name(file_key)
+ return obnamlib.get_checksum_algorithm(name)
+
+ def append_chunk(self, chunk_data, token):
+ self._summer.update(chunk_data)
+
+ def get_checksum(self):
+ '''Get the current whole-file checksum.'''
+ return self._summer.hexdigest()
+
+
+class _NullChecksum(object):
+
+ def update(self, data):
+ pass
+
+ def hexdigest(self):
+ return None
diff --git a/obnamlib/whole_file_checksummer_tests.py b/obnamlib/whole_file_checksummer_tests.py
new file mode 100644
index 00000000..c2addbec
--- /dev/null
+++ b/obnamlib/whole_file_checksummer_tests.py
@@ -0,0 +1,67 @@
+# Copyright 2016 Lars Wirzenius
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# =*= License: GPL-3+ =*=
+
+
+import unittest
+
+import obnamlib
+
+
+class WholeFileCheckSummerTests(unittest.TestCase):
+
+ def test_computes_nothing_if_repo_wants_no_checksum(self):
+ repo = FakeRepository(None)
+ summer = obnamlib.WholeFileCheckSummer(repo)
+ chunk = 'hello'
+ token = repo.prepare_chunk_for_indexes(chunk)
+ summer.append_chunk(chunk, token)
+ self.assertEqual(summer.get_checksum(), None)
+
+ def test_computes_checksum_for_md5(self):
+ repo = FakeRepository(obnamlib.REPO_FILE_MD5)
+ summer = obnamlib.WholeFileCheckSummer(repo)
+ chunk = 'hello'
+ token = repo.prepare_chunk_for_indexes(chunk)
+ summer.append_chunk(chunk, token)
+ self.assertEqual(
+ summer.get_checksum(),
+ '5d41402abc4b2a76b9719d911017c592')
+
+ def test_computes_checksum_for_sha512(self):
+ repo = FakeRepository(obnamlib.REPO_FILE_SHA512)
+ summer = obnamlib.WholeFileCheckSummer(repo)
+ chunk = 'hello'
+ token = repo.prepare_chunk_for_indexes(chunk)
+ summer.append_chunk(chunk, token)
+ self.assertEqual(
+ summer.get_checksum(),
+ '9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca7'
+ '2323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043')
+
+
+class FakeRepository(object):
+
+ def __init__(self, file_key):
+ self._file_key = file_key
+
+ def get_file_checksum_key(self):
+ return self._file_key
+
+ def prepare_chunk_for_indexes(self, data):
+ if self._file_key is None:
+ return None
+ return 'fake checksum'