diff options
author | Lars Wirzenius <liw@liw.fi> | 2013-12-25 15:11:05 +0000 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2013-12-28 21:51:25 +0000 |
commit | 94ba8e4a3bb26bdc868523e9132e0c0600ecd5ef (patch) | |
tree | 617072732b94ec7c775b31e922666cf187fd2658 | |
parent | 38abe70b1b8872b7708fc1f0354f51bfc88caa79 (diff) | |
download | obnam-94ba8e4a3bb26bdc868523e9132e0c0600ecd5ef.tar.gz |
Have RepositoryInterface return all potential matches
-rw-r--r-- | obnamlib/plugins/backup_plugin.py | 2 | ||||
-rw-r--r-- | obnamlib/repo_dummy.py | 9 | ||||
-rw-r--r-- | obnamlib/repo_fmt_6.py | 8 | ||||
-rw-r--r-- | obnamlib/repo_interface.py | 35 | ||||
-rw-r--r-- | test-gpghome/random_seed | bin | 600 -> 600 bytes |
5 files changed, 36 insertions, 18 deletions
diff --git a/obnamlib/plugins/backup_plugin.py b/obnamlib/plugins/backup_plugin.py index ae1c3709..1e39dc5a 100644 --- a/obnamlib/plugins/backup_plugin.py +++ b/obnamlib/plugins/backup_plugin.py @@ -761,7 +761,7 @@ class BackupPlugin(obnamlib.ObnamPlugin): # exceptions, and other errors. We don't care: we'll just # pretend no chunk with the checksum exists yet. try: - in_tree = [self.repo.find_chunk_id_by_content(data)] + in_tree = self.repo.find_chunk_ids_by_content(data) except larch.Error: in_tree = [] except obnamlib.RepositoryChunkContentNotInIndexes: diff --git a/obnamlib/repo_dummy.py b/obnamlib/repo_dummy.py index 4f9b462a..a6df73d8 100644 --- a/obnamlib/repo_dummy.py +++ b/obnamlib/repo_dummy.py @@ -449,10 +449,13 @@ class ChunkIndexes(object): self.data.set_value(chunk_id, token_is_chunk_content) def find_chunk(self, chunk_content): + chunk_ids = [] for chunk_id, stored_content in self.data.items(): if stored_content == chunk_content: - return chunk_id - raise obnamlib.RepositoryChunkContentNotInIndexes() + chunk_ids.append(chunk_id) + if not chunk_ids: + raise obnamlib.RepositoryChunkContentNotInIndexes() + return chunk_ids def remove_chunk(self, chunk_id, client_id): self._require_lock() @@ -659,7 +662,7 @@ class RepositoryFormatDummy(obnamlib.RepositoryInterface): def put_chunk_into_indexes(self, chunk_id, token, client_id): self._chunk_indexes.put_chunk(chunk_id, token, client_id) - def find_chunk_id_by_content(self, chunk_content): + def find_chunk_ids_by_content(self, chunk_content): return self._chunk_indexes.find_chunk(chunk_content) def remove_chunk_from_indexes(self, chunk_id, client_id): diff --git a/obnamlib/repo_fmt_6.py b/obnamlib/repo_fmt_6.py index e40f2c75..e6c75173 100644 --- a/obnamlib/repo_fmt_6.py +++ b/obnamlib/repo_fmt_6.py @@ -674,13 +674,11 @@ class RepositoryFormat6(obnamlib.RepositoryInterface): self._chunksums.remove(checksum, chunk_id, client_id) self._chunklist.remove(chunk_id) - def find_chunk_id_by_content(self, data): + def find_chunk_ids_by_content(self, data): checksum = self._checksum(data) candidates = self._chunksums.find(checksum) - for chunk_id in candidates: - chunk_data = self.get_chunk_content(chunk_id) - if chunk_data == data: - return chunk_id + if candidates: + return candidates raise obnamlib.RepositoryChunkContentNotInIndexes() def validate_chunk_content(self, chunk_id): diff --git a/obnamlib/repo_interface.py b/obnamlib/repo_interface.py index b20a6f59..f4ab7531 100644 --- a/obnamlib/repo_interface.py +++ b/obnamlib/repo_interface.py @@ -722,13 +722,15 @@ class RepositoryInterface(object): '''Removes a chunk from indexes, given its id, for a given client.''' raise NotImplementedError() - def find_chunk_id_by_content(self, data): - '''Finds a chunk id given its content. + def find_chunk_ids_by_content(self, data): + '''Finds chunk ids that probably match a given content. This will raise RepositoryChunkContentNotInIndexes if the - chunk is not in the indexes. Otherwise it will return one - chunk id that has exactly the same content. If the indexes - contain duplicate chunks, any one of the might be returned. + chunk is not in the indexes. Otherwise it will return all + chunk ids that would have the same token (see + prepare_chunk_for_indexes). Note that the chunks whose ids are + returned do not necessarily match the given data; if the + caller cares, they need to verify. ''' raise NotImplementedError() @@ -1738,7 +1740,22 @@ class RepositoryInterfaceTests(unittest.TestCase): # pragma: no cover token = self.repo.prepare_chunk_for_indexes('foochunk') self.repo.put_chunk_into_indexes(chunk_id, token, 'fooclient') self.assertEqual( - self.repo.find_chunk_id_by_content('foochunk'), chunk_id) + self.repo.find_chunk_ids_by_content('foochunk'), [chunk_id]) + + def test_finds_all_matching_chunk_ids(self): + self.setup_client() + token = self.repo.prepare_chunk_for_indexes('foochunk') + self.repo.lock_chunk_indexes() + + chunk_id_1 = self.repo.put_chunk_content('foochunk') + self.repo.put_chunk_into_indexes(chunk_id_1, token, 'fooclient') + + chunk_id_2 = self.repo.put_chunk_content('foochunk') + self.repo.put_chunk_into_indexes(chunk_id_2, token, 'fooclient') + + self.assertEqual( + set(self.repo.find_chunk_ids_by_content('foochunk')), + set([chunk_id_1, chunk_id_2])) def test_removes_chunk_from_indexes(self): self.setup_client() @@ -1749,7 +1766,7 @@ class RepositoryInterfaceTests(unittest.TestCase): # pragma: no cover self.repo.remove_chunk_from_indexes(chunk_id, 'fooclient') self.assertRaises( obnamlib.RepositoryChunkContentNotInIndexes, - self.repo.find_chunk_id_by_content, 'foochunk') + self.repo.find_chunk_ids_by_content, 'foochunk') def test_putting_chunk_to_indexes_without_locking_them_fails(self): chunk_id = self.repo.put_chunk_content('foochunk') @@ -1779,7 +1796,7 @@ class RepositoryInterfaceTests(unittest.TestCase): # pragma: no cover self.repo.unlock_chunk_indexes() self.assertRaises( obnamlib.RepositoryChunkContentNotInIndexes, - self.repo.find_chunk_id_by_content, 'foochunk') + self.repo.find_chunk_ids_by_content, 'foochunk') def test_committing_chunk_indexes_remembers_changes(self): self.setup_client() @@ -1789,7 +1806,7 @@ class RepositoryInterfaceTests(unittest.TestCase): # pragma: no cover self.repo.put_chunk_into_indexes(chunk_id, token, 'fooclient') self.repo.commit_chunk_indexes() self.assertEqual( - self.repo.find_chunk_id_by_content('foochunk'), chunk_id) + self.repo.find_chunk_ids_by_content('foochunk'), [chunk_id]) def test_locking_chunk_indexes_twice_fails(self): self.repo.lock_chunk_indexes() diff --git a/test-gpghome/random_seed b/test-gpghome/random_seed Binary files differindex 273edb2a..e03c7d57 100644 --- a/test-gpghome/random_seed +++ b/test-gpghome/random_seed |