summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2016-03-17 16:46:33 +0200
committerLars Wirzenius <liw@liw.fi>2016-03-17 17:50:05 +0200
commit45b3bd4949248e5fd0973ed3f04db86bbef5818e (patch)
treec4342d6a4044a3f5e64679649850aedf2153e2ea
parenteec9750c81f0ba15a9d1a43efbdb85b3211f2daa (diff)
downloadobnam-45b3bd4949248e5fd0973ed3f04db86bbef5818e.tar.gz
Add --checksum-algorithm setting
-rw-r--r--obnamlib/__init__.py4
-rw-r--r--obnamlib/app.py13
-rw-r--r--obnamlib/checksummer.py43
-rw-r--r--obnamlib/checksummer_tests.py48
-rw-r--r--obnamlib/fmt_ga/format.py8
-rw-r--r--obnamlib/fmt_ga/format_tests.py5
-rw-r--r--obnamlib/fmt_ga/indexes.py27
7 files changed, 137 insertions, 11 deletions
diff --git a/obnamlib/__init__.py b/obnamlib/__init__.py
index ab4fb440..94527275 100644
--- a/obnamlib/__init__.py
+++ b/obnamlib/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2009-2015 Lars Wirzenius
+# Copyright (C) 2009-2016 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -73,6 +73,8 @@ from .encryption import (
EncryptionError,
GpgError)
+from .checksummer import checksum_algorithms, get_checksum_algorithm
+
from .hooks import (
Hook, MissingFilterError, NoFilterTagError, FilterHook, HookManager)
from .pluginbase import ObnamPlugin
diff --git a/obnamlib/app.py b/obnamlib/app.py
index 3a777257..d27aec67 100644
--- a/obnamlib/app.py
+++ b/obnamlib/app.py
@@ -92,6 +92,18 @@ class App(cliapp.Application):
'use FORMAT for new repositories; one of "6", "green-albatross"',
metavar='FORMAT')
+ algos = list(obnamlib.checksum_algorithms)
+ algos.remove('sha512')
+ algos = ['sha512'] + algos
+ self.settings.choice(
+ ['checksum-algorithm'],
+ algos,
+ 'use CHECKSUM for checksum algorithm '
+ '(not for repository format 6); '
+ 'one of: ' +
+ ', '.join(algos),
+ metavar='CHECKSUM')
+
# Performance related settings.
perf_group = obnamlib.option_group['perf']
@@ -260,6 +272,7 @@ class App(cliapp.Application):
'chunk_bag_size': self.settings['chunk-bag-size'],
'dir_cache_size': self.settings['dir-cache-size'],
'dir_bag_size': self.settings['dir-bag-size'],
+ 'checksum_algorithm': self.settings['checksum-algorithm'],
}
if create:
diff --git a/obnamlib/checksummer.py b/obnamlib/checksummer.py
new file mode 100644
index 00000000..74ea35fc
--- /dev/null
+++ b/obnamlib/checksummer.py
@@ -0,0 +1,43 @@
+# Copyright 2016 Lars Wirzenius
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# =*= License: GPL-3+ =*=
+
+
+import hashlib
+
+import obnamlib
+
+
+_algorithms = {
+ 'sha224': hashlib.sha224,
+ 'sha256': hashlib.sha256,
+ 'sha384': hashlib.sha384,
+ 'sha512': hashlib.sha512,
+}
+
+
+checksum_algorithms = _algorithms.keys()
+
+
+def get_checksum_algorithm(name):
+ if name in _algorithms:
+ return _algorithms[name]()
+ raise UnknownChecksumAlgorithm(algorithm=name)
+
+
+class UnknownChecksumAlgorithm(obnamlib.ObnamError):
+
+ msg = 'Unknown checksum algorithm {algorithm}.'
diff --git a/obnamlib/checksummer_tests.py b/obnamlib/checksummer_tests.py
new file mode 100644
index 00000000..10473740
--- /dev/null
+++ b/obnamlib/checksummer_tests.py
@@ -0,0 +1,48 @@
+# Copyright 2016 Lars Wirzenius
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# =*= License: GPL-3+ =*=
+
+
+import unittest
+
+import obnamlib
+
+
+class TestGetChecksummer(unittest.TestCase):
+
+ def test_knows_some_algorithms(self):
+ self.assertEqual(type(obnamlib.checksum_algorithms), list)
+ self.assertNotEqual(obnamlib.checksum_algorithms, [])
+
+ def test_raises_error_if_algorithm_is_unknown(self):
+ self.assertRaises(
+ obnamlib.ObnamError, obnamlib.get_checksum_algorithm, 'unknown')
+
+ def test_returns_working_sha512(self):
+ summer = obnamlib.get_checksum_algorithm('sha512')
+ summer.update('hello, world')
+ self.assertEqual(
+ summer.hexdigest(),
+ '8710339dcb6814d0d9d2290ef422285c9322b7163951f9a0ca8f883d3305286f'
+ '44139aa374848e4174f5aada663027e4548637b6d19894aec4fb6c46a139fbf9')
+
+ def test_every_algorithm_has_right_api(self):
+ for name in obnamlib.checksum_algorithms:
+ summer = obnamlib.get_checksum_algorithm(name)
+ summer.update('hello, world')
+ checksum = summer.hexdigest()
+ self.assertEqual(type(checksum), str)
+ self.assertNotEqual(checksum, '')
diff --git a/obnamlib/fmt_ga/format.py b/obnamlib/fmt_ga/format.py
index f05a31f2..dfda1cc0 100644
--- a/obnamlib/fmt_ga/format.py
+++ b/obnamlib/fmt_ga/format.py
@@ -1,4 +1,4 @@
-# Copyright 2015 Lars Wirzenius
+# Copyright 2015-2016 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -27,9 +27,13 @@ class RepositoryFormatGA(obnamlib.RepositoryDelegator):
obnamlib.RepositoryDelegator.__init__(self, **kwargs)
self.set_client_list_object(obnamlib.GAClientList())
- self.set_chunk_indexes_object(obnamlib.GAChunkIndexes())
self.set_client_factory(obnamlib.GAClient)
+ self.set_chunk_indexes_object(obnamlib.GAChunkIndexes())
+ assert 'checksum_algorithm' in kwargs
+ self._chunk_indexes.set_default_checksum_algorithm(
+ kwargs['checksum_algorithm'])
+
chunk_store = obnamlib.GAChunkStore()
if 'chunk_size' in kwargs: # pragma: no cover
chunk_store.set_max_chunk_size(kwargs['chunk_size'])
diff --git a/obnamlib/fmt_ga/format_tests.py b/obnamlib/fmt_ga/format_tests.py
index 0f603a70..502a1348 100644
--- a/obnamlib/fmt_ga/format_tests.py
+++ b/obnamlib/fmt_ga/format_tests.py
@@ -1,4 +1,4 @@
-# Copyright 2015 Lars Wirzenius
+# Copyright 2015-2016 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -37,7 +37,8 @@ class RepositoryFormatGATests(obnamlib.RepositoryInterfaceTests):
hooks=self.hooks,
current_time=time.time,
dir_bag_size=1,
- dir_cache_size=0)
+ dir_cache_size=0,
+ checksum_algorithm='sha512')
self.repo.set_fs(fs)
def tearDown(self):
diff --git a/obnamlib/fmt_ga/indexes.py b/obnamlib/fmt_ga/indexes.py
index ab36ff97..a3986ec5 100644
--- a/obnamlib/fmt_ga/indexes.py
+++ b/obnamlib/fmt_ga/indexes.py
@@ -17,7 +17,6 @@
import errno
-import hashlib
import logging
import os
@@ -30,12 +29,22 @@ class GAChunkIndexes(object):
def __init__(self):
self._fs = None
+ self._checksum_name = None
self.set_dirname('chunk-indexes')
self.clear()
def set_fs(self, fs):
self._fs = fs
+ # Load the data so that we can get the in-use checksum
+ # algorithm at once, before we use the default, just in case
+ # they're different.
+ self._load_data()
+
+ def set_default_checksum_algorithm(self, name):
+ if self._checksum_name is None:
+ self._checksum_name = name
+
def set_dirname(self, dirname):
self._dirname = dirname
@@ -74,7 +83,7 @@ class GAChunkIndexes(object):
'by_chunk_id': {
},
'by_checksum': {
- 'sha512': {},
+ self._checksum_name: {},
},
'used_by': {
},
@@ -83,13 +92,19 @@ class GAChunkIndexes(object):
self._data = obnamlib.deserialise_object(blob)
assert self._data is not None
+ keys = self._data['by_checksum'].keys()
+ assert len(keys) == 1
+ self._checksum_name = keys[0]
+
self._data_is_loaded = True
def _get_filename(self):
return os.path.join(self.get_dirname(), 'data.dat')
def prepare_chunk_for_indexes(self, chunk_content):
- return hashlib.sha512(chunk_content).hexdigest()
+ summer = obnamlib.get_checksum_algorithm(self._checksum_name)
+ summer.update(chunk_content)
+ return summer.hexdigest()
def put_chunk_into_indexes(self, chunk_id, token, client_id):
self._load_data()
@@ -97,7 +112,7 @@ class GAChunkIndexes(object):
by_chunk_id = self._data['by_chunk_id']
by_chunk_id[chunk_id] = token
- by_checksum = self._data['by_checksum']['sha512']
+ by_checksum = self._data['by_checksum'][self._checksum_name]
chunk_ids = by_checksum.get(token, [])
if chunk_id not in chunk_ids:
chunk_ids.append(chunk_id)
@@ -112,7 +127,7 @@ class GAChunkIndexes(object):
def find_chunk_ids_by_token(self, token):
self._load_data()
- by_checksum = self._data['by_checksum']['sha512']
+ by_checksum = self._data['by_checksum'][self._checksum_name]
result = by_checksum.get(token, [])
if not result:
@@ -153,7 +168,7 @@ class GAChunkIndexes(object):
return token
def _remove_chunk_by_checksum(self, chunk_id, token):
- by_checksum = self._data['by_checksum']['sha512']
+ by_checksum = self._data['by_checksum'][self._checksum_name]
chunk_ids = by_checksum.get(token, [])
if chunk_id in chunk_ids:
chunk_ids.remove(chunk_id)