summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2015-03-22 13:58:21 +0200
committerLars Wirzenius <liw@liw.fi>2015-03-22 13:58:21 +0200
commit0cba6730e210fae49bd296a4498b111e88d550d2 (patch)
tree93004d5d0014e16666f4c147a19d5549caca76d0
parent70709b5039a6db6bef1787e048b43140a135e714 (diff)
parentb2a7f4e2f0cedb6c1925070f2e7e1dc8511def27 (diff)
downloadobnam-0cba6730e210fae49bd296a4498b111e88d550d2.tar.gz
Fix race condition between backup and forget wrt chunk deletion
-rw-r--r--obnamlib/plugins/forget_plugin.py45
1 files changed, 43 insertions, 2 deletions
diff --git a/obnamlib/plugins/forget_plugin.py b/obnamlib/plugins/forget_plugin.py
index 534381fc..6f49a138 100644
--- a/obnamlib/plugins/forget_plugin.py
+++ b/obnamlib/plugins/forget_plugin.py
@@ -41,7 +41,41 @@ class ForgetPlugin(obnamlib.ObnamPlugin):
self.app.ts.format('forgetting generations: %Index(gen,gens) done')
self.repo = self.app.get_repository_object()
- self.repo.lock_client(self.app.settings['client-name'])
+
+ # We lock everything. This is to avoid a race condition
+ # between different clients doing backup and forget at the
+ # same time. If we only lock the client we care about, plus
+ # the chunk indexes, the following scenario is possible:
+ #
+ # 1. Client A locks itself, plus chunk indexes, and
+ # starts running forget, but slowly.
+ # 2. Client B locks itself, and starts running a backup.
+ # It merrily uses chunks that A thinks are only used
+ # but A itself, since B hasn't updated the chunk
+ # indexes, and can't do that before A is done.
+ # 3. Client A finishes doing forget, and removes a number
+ # of chunks, because nobody else was marked as using
+ # them. However, some of these chunks are now being
+ # used by B.
+ # 4. A commits its changes.
+ # 5. B gains lock to chunk indexes, and commits its changes.
+ #
+ # At this point, the chunk indexes indicate that B uses some chunks,
+ # but A already removed the chunks.
+ #
+ # By locking all clients during a forget, we prevent this race
+ # condition: nobody else can be running a backup while anyone is
+ # running a forget. We also lock the client list to prevent a new
+ # client from being added.
+ #
+ # This is not a great solution, as it means that during a
+ # forget (which currently can be quite slow) nobody can do a
+ # backup. However, correctness trumps speed.
+
+ self.repo.lock_client_list()
+ client_names = self.repo.get_client_names()
+ for some_client_name in client_names:
+ self.repo.lock_client(some_client_name)
self.repo.lock_chunk_indexes()
self.app.dump_memory_profile('at beginning')
@@ -85,9 +119,16 @@ class ForgetPlugin(obnamlib.ObnamPlugin):
'after removing %s' %
self.repo.make_generation_spec(genid))
- self.repo.commit_client(client_name)
+ # Commit or unlock everything.
+ self.repo.unlock_client_list()
+ for some_client_name in client_names:
+ if some_client_name == client_name:
+ self.repo.commit_client(some_client_name)
+ else:
+ self.repo.unlock_client(some_client_name)
self.repo.commit_chunk_indexes()
self.app.dump_memory_profile('after committing')
+
self.repo.close()
self.app.ts.finish()