summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2013-12-25 15:11:05 +0000
committerLars Wirzenius <liw@liw.fi>2013-12-28 21:51:25 +0000
commit94ba8e4a3bb26bdc868523e9132e0c0600ecd5ef (patch)
tree617072732b94ec7c775b31e922666cf187fd2658
parent38abe70b1b8872b7708fc1f0354f51bfc88caa79 (diff)
downloadobnam-94ba8e4a3bb26bdc868523e9132e0c0600ecd5ef.tar.gz
Have RepositoryInterface return all potential matches
-rw-r--r--obnamlib/plugins/backup_plugin.py2
-rw-r--r--obnamlib/repo_dummy.py9
-rw-r--r--obnamlib/repo_fmt_6.py8
-rw-r--r--obnamlib/repo_interface.py35
-rw-r--r--test-gpghome/random_seedbin600 -> 600 bytes
5 files changed, 36 insertions, 18 deletions
diff --git a/obnamlib/plugins/backup_plugin.py b/obnamlib/plugins/backup_plugin.py
index ae1c3709..1e39dc5a 100644
--- a/obnamlib/plugins/backup_plugin.py
+++ b/obnamlib/plugins/backup_plugin.py
@@ -761,7 +761,7 @@ class BackupPlugin(obnamlib.ObnamPlugin):
# exceptions, and other errors. We don't care: we'll just
# pretend no chunk with the checksum exists yet.
try:
- in_tree = [self.repo.find_chunk_id_by_content(data)]
+ in_tree = self.repo.find_chunk_ids_by_content(data)
except larch.Error:
in_tree = []
except obnamlib.RepositoryChunkContentNotInIndexes:
diff --git a/obnamlib/repo_dummy.py b/obnamlib/repo_dummy.py
index 4f9b462a..a6df73d8 100644
--- a/obnamlib/repo_dummy.py
+++ b/obnamlib/repo_dummy.py
@@ -449,10 +449,13 @@ class ChunkIndexes(object):
self.data.set_value(chunk_id, token_is_chunk_content)
def find_chunk(self, chunk_content):
+ chunk_ids = []
for chunk_id, stored_content in self.data.items():
if stored_content == chunk_content:
- return chunk_id
- raise obnamlib.RepositoryChunkContentNotInIndexes()
+ chunk_ids.append(chunk_id)
+ if not chunk_ids:
+ raise obnamlib.RepositoryChunkContentNotInIndexes()
+ return chunk_ids
def remove_chunk(self, chunk_id, client_id):
self._require_lock()
@@ -659,7 +662,7 @@ class RepositoryFormatDummy(obnamlib.RepositoryInterface):
def put_chunk_into_indexes(self, chunk_id, token, client_id):
self._chunk_indexes.put_chunk(chunk_id, token, client_id)
- def find_chunk_id_by_content(self, chunk_content):
+ def find_chunk_ids_by_content(self, chunk_content):
return self._chunk_indexes.find_chunk(chunk_content)
def remove_chunk_from_indexes(self, chunk_id, client_id):
diff --git a/obnamlib/repo_fmt_6.py b/obnamlib/repo_fmt_6.py
index e40f2c75..e6c75173 100644
--- a/obnamlib/repo_fmt_6.py
+++ b/obnamlib/repo_fmt_6.py
@@ -674,13 +674,11 @@ class RepositoryFormat6(obnamlib.RepositoryInterface):
self._chunksums.remove(checksum, chunk_id, client_id)
self._chunklist.remove(chunk_id)
- def find_chunk_id_by_content(self, data):
+ def find_chunk_ids_by_content(self, data):
checksum = self._checksum(data)
candidates = self._chunksums.find(checksum)
- for chunk_id in candidates:
- chunk_data = self.get_chunk_content(chunk_id)
- if chunk_data == data:
- return chunk_id
+ if candidates:
+ return candidates
raise obnamlib.RepositoryChunkContentNotInIndexes()
def validate_chunk_content(self, chunk_id):
diff --git a/obnamlib/repo_interface.py b/obnamlib/repo_interface.py
index b20a6f59..f4ab7531 100644
--- a/obnamlib/repo_interface.py
+++ b/obnamlib/repo_interface.py
@@ -722,13 +722,15 @@ class RepositoryInterface(object):
'''Removes a chunk from indexes, given its id, for a given client.'''
raise NotImplementedError()
- def find_chunk_id_by_content(self, data):
- '''Finds a chunk id given its content.
+ def find_chunk_ids_by_content(self, data):
+ '''Finds chunk ids that probably match a given content.
This will raise RepositoryChunkContentNotInIndexes if the
- chunk is not in the indexes. Otherwise it will return one
- chunk id that has exactly the same content. If the indexes
- contain duplicate chunks, any one of the might be returned.
+ chunk is not in the indexes. Otherwise it will return all
+ chunk ids that would have the same token (see
+ prepare_chunk_for_indexes). Note that the chunks whose ids are
+ returned do not necessarily match the given data; if the
+ caller cares, they need to verify.
'''
raise NotImplementedError()
@@ -1738,7 +1740,22 @@ class RepositoryInterfaceTests(unittest.TestCase): # pragma: no cover
token = self.repo.prepare_chunk_for_indexes('foochunk')
self.repo.put_chunk_into_indexes(chunk_id, token, 'fooclient')
self.assertEqual(
- self.repo.find_chunk_id_by_content('foochunk'), chunk_id)
+ self.repo.find_chunk_ids_by_content('foochunk'), [chunk_id])
+
+ def test_finds_all_matching_chunk_ids(self):
+ self.setup_client()
+ token = self.repo.prepare_chunk_for_indexes('foochunk')
+ self.repo.lock_chunk_indexes()
+
+ chunk_id_1 = self.repo.put_chunk_content('foochunk')
+ self.repo.put_chunk_into_indexes(chunk_id_1, token, 'fooclient')
+
+ chunk_id_2 = self.repo.put_chunk_content('foochunk')
+ self.repo.put_chunk_into_indexes(chunk_id_2, token, 'fooclient')
+
+ self.assertEqual(
+ set(self.repo.find_chunk_ids_by_content('foochunk')),
+ set([chunk_id_1, chunk_id_2]))
def test_removes_chunk_from_indexes(self):
self.setup_client()
@@ -1749,7 +1766,7 @@ class RepositoryInterfaceTests(unittest.TestCase): # pragma: no cover
self.repo.remove_chunk_from_indexes(chunk_id, 'fooclient')
self.assertRaises(
obnamlib.RepositoryChunkContentNotInIndexes,
- self.repo.find_chunk_id_by_content, 'foochunk')
+ self.repo.find_chunk_ids_by_content, 'foochunk')
def test_putting_chunk_to_indexes_without_locking_them_fails(self):
chunk_id = self.repo.put_chunk_content('foochunk')
@@ -1779,7 +1796,7 @@ class RepositoryInterfaceTests(unittest.TestCase): # pragma: no cover
self.repo.unlock_chunk_indexes()
self.assertRaises(
obnamlib.RepositoryChunkContentNotInIndexes,
- self.repo.find_chunk_id_by_content, 'foochunk')
+ self.repo.find_chunk_ids_by_content, 'foochunk')
def test_committing_chunk_indexes_remembers_changes(self):
self.setup_client()
@@ -1789,7 +1806,7 @@ class RepositoryInterfaceTests(unittest.TestCase): # pragma: no cover
self.repo.put_chunk_into_indexes(chunk_id, token, 'fooclient')
self.repo.commit_chunk_indexes()
self.assertEqual(
- self.repo.find_chunk_id_by_content('foochunk'), chunk_id)
+ self.repo.find_chunk_ids_by_content('foochunk'), [chunk_id])
def test_locking_chunk_indexes_twice_fails(self):
self.repo.lock_chunk_indexes()
diff --git a/test-gpghome/random_seed b/test-gpghome/random_seed
index 273edb2a..e03c7d57 100644
--- a/test-gpghome/random_seed
+++ b/test-gpghome/random_seed
Binary files differ