From 45b3bd4949248e5fd0973ed3f04db86bbef5818e Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Thu, 17 Mar 2016 16:46:33 +0200 Subject: Add --checksum-algorithm setting --- obnamlib/__init__.py | 4 +++- obnamlib/app.py | 13 +++++++++++ obnamlib/checksummer.py | 43 ++++++++++++++++++++++++++++++++++++ obnamlib/checksummer_tests.py | 48 +++++++++++++++++++++++++++++++++++++++++ obnamlib/fmt_ga/format.py | 8 +++++-- obnamlib/fmt_ga/format_tests.py | 5 +++-- obnamlib/fmt_ga/indexes.py | 27 +++++++++++++++++------ 7 files changed, 137 insertions(+), 11 deletions(-) create mode 100644 obnamlib/checksummer.py create mode 100644 obnamlib/checksummer_tests.py diff --git a/obnamlib/__init__.py b/obnamlib/__init__.py index ab4fb440..94527275 100644 --- a/obnamlib/__init__.py +++ b/obnamlib/__init__.py @@ -1,4 +1,4 @@ -# Copyright (C) 2009-2015 Lars Wirzenius +# Copyright (C) 2009-2016 Lars Wirzenius # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -73,6 +73,8 @@ from .encryption import ( EncryptionError, GpgError) +from .checksummer import checksum_algorithms, get_checksum_algorithm + from .hooks import ( Hook, MissingFilterError, NoFilterTagError, FilterHook, HookManager) from .pluginbase import ObnamPlugin diff --git a/obnamlib/app.py b/obnamlib/app.py index 3a777257..d27aec67 100644 --- a/obnamlib/app.py +++ b/obnamlib/app.py @@ -92,6 +92,18 @@ class App(cliapp.Application): 'use FORMAT for new repositories; one of "6", "green-albatross"', metavar='FORMAT') + algos = list(obnamlib.checksum_algorithms) + algos.remove('sha512') + algos = ['sha512'] + algos + self.settings.choice( + ['checksum-algorithm'], + algos, + 'use CHECKSUM for checksum algorithm ' + '(not for repository format 6); ' + 'one of: ' + + ', '.join(algos), + metavar='CHECKSUM') + # Performance related settings. perf_group = obnamlib.option_group['perf'] @@ -260,6 +272,7 @@ class App(cliapp.Application): 'chunk_bag_size': self.settings['chunk-bag-size'], 'dir_cache_size': self.settings['dir-cache-size'], 'dir_bag_size': self.settings['dir-bag-size'], + 'checksum_algorithm': self.settings['checksum-algorithm'], } if create: diff --git a/obnamlib/checksummer.py b/obnamlib/checksummer.py new file mode 100644 index 00000000..74ea35fc --- /dev/null +++ b/obnamlib/checksummer.py @@ -0,0 +1,43 @@ +# Copyright 2016 Lars Wirzenius +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# =*= License: GPL-3+ =*= + + +import hashlib + +import obnamlib + + +_algorithms = { + 'sha224': hashlib.sha224, + 'sha256': hashlib.sha256, + 'sha384': hashlib.sha384, + 'sha512': hashlib.sha512, +} + + +checksum_algorithms = _algorithms.keys() + + +def get_checksum_algorithm(name): + if name in _algorithms: + return _algorithms[name]() + raise UnknownChecksumAlgorithm(algorithm=name) + + +class UnknownChecksumAlgorithm(obnamlib.ObnamError): + + msg = 'Unknown checksum algorithm {algorithm}.' diff --git a/obnamlib/checksummer_tests.py b/obnamlib/checksummer_tests.py new file mode 100644 index 00000000..10473740 --- /dev/null +++ b/obnamlib/checksummer_tests.py @@ -0,0 +1,48 @@ +# Copyright 2016 Lars Wirzenius +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# =*= License: GPL-3+ =*= + + +import unittest + +import obnamlib + + +class TestGetChecksummer(unittest.TestCase): + + def test_knows_some_algorithms(self): + self.assertEqual(type(obnamlib.checksum_algorithms), list) + self.assertNotEqual(obnamlib.checksum_algorithms, []) + + def test_raises_error_if_algorithm_is_unknown(self): + self.assertRaises( + obnamlib.ObnamError, obnamlib.get_checksum_algorithm, 'unknown') + + def test_returns_working_sha512(self): + summer = obnamlib.get_checksum_algorithm('sha512') + summer.update('hello, world') + self.assertEqual( + summer.hexdigest(), + '8710339dcb6814d0d9d2290ef422285c9322b7163951f9a0ca8f883d3305286f' + '44139aa374848e4174f5aada663027e4548637b6d19894aec4fb6c46a139fbf9') + + def test_every_algorithm_has_right_api(self): + for name in obnamlib.checksum_algorithms: + summer = obnamlib.get_checksum_algorithm(name) + summer.update('hello, world') + checksum = summer.hexdigest() + self.assertEqual(type(checksum), str) + self.assertNotEqual(checksum, '') diff --git a/obnamlib/fmt_ga/format.py b/obnamlib/fmt_ga/format.py index f05a31f2..dfda1cc0 100644 --- a/obnamlib/fmt_ga/format.py +++ b/obnamlib/fmt_ga/format.py @@ -1,4 +1,4 @@ -# Copyright 2015 Lars Wirzenius +# Copyright 2015-2016 Lars Wirzenius # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -27,9 +27,13 @@ class RepositoryFormatGA(obnamlib.RepositoryDelegator): obnamlib.RepositoryDelegator.__init__(self, **kwargs) self.set_client_list_object(obnamlib.GAClientList()) - self.set_chunk_indexes_object(obnamlib.GAChunkIndexes()) self.set_client_factory(obnamlib.GAClient) + self.set_chunk_indexes_object(obnamlib.GAChunkIndexes()) + assert 'checksum_algorithm' in kwargs + self._chunk_indexes.set_default_checksum_algorithm( + kwargs['checksum_algorithm']) + chunk_store = obnamlib.GAChunkStore() if 'chunk_size' in kwargs: # pragma: no cover chunk_store.set_max_chunk_size(kwargs['chunk_size']) diff --git a/obnamlib/fmt_ga/format_tests.py b/obnamlib/fmt_ga/format_tests.py index 0f603a70..502a1348 100644 --- a/obnamlib/fmt_ga/format_tests.py +++ b/obnamlib/fmt_ga/format_tests.py @@ -1,4 +1,4 @@ -# Copyright 2015 Lars Wirzenius +# Copyright 2015-2016 Lars Wirzenius # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -37,7 +37,8 @@ class RepositoryFormatGATests(obnamlib.RepositoryInterfaceTests): hooks=self.hooks, current_time=time.time, dir_bag_size=1, - dir_cache_size=0) + dir_cache_size=0, + checksum_algorithm='sha512') self.repo.set_fs(fs) def tearDown(self): diff --git a/obnamlib/fmt_ga/indexes.py b/obnamlib/fmt_ga/indexes.py index ab36ff97..a3986ec5 100644 --- a/obnamlib/fmt_ga/indexes.py +++ b/obnamlib/fmt_ga/indexes.py @@ -17,7 +17,6 @@ import errno -import hashlib import logging import os @@ -30,12 +29,22 @@ class GAChunkIndexes(object): def __init__(self): self._fs = None + self._checksum_name = None self.set_dirname('chunk-indexes') self.clear() def set_fs(self, fs): self._fs = fs + # Load the data so that we can get the in-use checksum + # algorithm at once, before we use the default, just in case + # they're different. + self._load_data() + + def set_default_checksum_algorithm(self, name): + if self._checksum_name is None: + self._checksum_name = name + def set_dirname(self, dirname): self._dirname = dirname @@ -74,7 +83,7 @@ class GAChunkIndexes(object): 'by_chunk_id': { }, 'by_checksum': { - 'sha512': {}, + self._checksum_name: {}, }, 'used_by': { }, @@ -83,13 +92,19 @@ class GAChunkIndexes(object): self._data = obnamlib.deserialise_object(blob) assert self._data is not None + keys = self._data['by_checksum'].keys() + assert len(keys) == 1 + self._checksum_name = keys[0] + self._data_is_loaded = True def _get_filename(self): return os.path.join(self.get_dirname(), 'data.dat') def prepare_chunk_for_indexes(self, chunk_content): - return hashlib.sha512(chunk_content).hexdigest() + summer = obnamlib.get_checksum_algorithm(self._checksum_name) + summer.update(chunk_content) + return summer.hexdigest() def put_chunk_into_indexes(self, chunk_id, token, client_id): self._load_data() @@ -97,7 +112,7 @@ class GAChunkIndexes(object): by_chunk_id = self._data['by_chunk_id'] by_chunk_id[chunk_id] = token - by_checksum = self._data['by_checksum']['sha512'] + by_checksum = self._data['by_checksum'][self._checksum_name] chunk_ids = by_checksum.get(token, []) if chunk_id not in chunk_ids: chunk_ids.append(chunk_id) @@ -112,7 +127,7 @@ class GAChunkIndexes(object): def find_chunk_ids_by_token(self, token): self._load_data() - by_checksum = self._data['by_checksum']['sha512'] + by_checksum = self._data['by_checksum'][self._checksum_name] result = by_checksum.get(token, []) if not result: @@ -153,7 +168,7 @@ class GAChunkIndexes(object): return token def _remove_chunk_by_checksum(self, chunk_id, token): - by_checksum = self._data['by_checksum']['sha512'] + by_checksum = self._data['by_checksum'][self._checksum_name] chunk_ids = by_checksum.get(token, []) if chunk_id in chunk_ids: chunk_ids.remove(chunk_id) -- cgit v1.2.1