summaryrefslogtreecommitdiff
path: root/obnamlib/plugins/forget_plugin.py
blob: d5c4453059fd905a8ba83315a367b0affa54ca79 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# Copyright (C) 2010-2017  Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


import datetime
import logging

import obnamlib


class ForgetPlugin(obnamlib.ObnamPlugin):

    '''Forget generations.'''

    def enable(self):
        self.app.add_subcommand(
            'forget', self.forget, arg_synopsis='[GENERATION]...')
        self.app.settings.string(
            ['keep'],
            'policy for what generations to keep '
            'when forgetting')

    def forget(self, args):
        '''Forget (remove) specified backup generations.'''
        self.app.settings.require('repository')
        self.app.settings.require('client-name')

        if not args and not self.app.settings['keep']:
            logging.info('forget: Nothing to forget, not doing anything')
            return

        self.setup_progress_reporting()

        self.repo = self.app.get_repository_object()

        # We lock everything. This is to avoid a race condition
        # between different clients doing backup and forget at the
        # same time. If we only lock the client we care about, plus
        # the chunk indexes, the following scenario is possible:
        #
        #       1. Client A locks itself, plus chunk indexes, and
        #          starts running forget, but slowly.
        #       2. Client B locks itself, and starts running a backup.
        #          It merrily uses chunks that A thinks are only used
        #          but A itself, since B hasn't updated the chunk
        #          indexes, and can't do that before A is done.
        #       3. Client A finishes doing forget, and removes a number
        #          of chunks, because nobody else was marked as using
        #          them. However, some of these chunks are now being
        #          used by B.
        #       4. A commits its changes.
        #       5. B gains lock to chunk indexes, and commits its changes.
        #
        # At this point, the chunk indexes indicate that B uses some chunks,
        # but A already removed the chunks.
        #
        # By locking all clients during a forget, we prevent this race
        # condition: nobody else can be running a backup while anyone is
        # running a forget. We also lock the client list to prevent a new
        # client from being added.
        #
        # This is not a great solution, as it means that during a
        # forget (which currently can be quite slow) nobody can do a
        # backup. However, correctness trumps speed.

        self.repo.lock_everything()

        client_name = self.app.settings['client-name']
        if args:
            removeids = self.get_genids_to_remove_from_args(client_name, args)
        elif self.app.settings['keep']:
            genlist = self.get_all_generations(client_name)
            removeids = self.choose_genids_to_remove_using_keep_policy(genlist)
        else:
            removeids = []

        self.app.ts['gens'] = removeids
        for genid in removeids:
            self.app.ts['gen'] = genid
            chunk_ids = self.remove(genid)
            for unused_chunk_id in chunk_ids:
                self.repo.remove_chunk_from_indexes(
                    unused_chunk_id, client_name)
            self.repo.commit_client(client_name)
            self.repo.commit_chunk_indexes()
            self.repo.remove_unused_chunks()
            self.app.dump_memory_profile(
                'after removing %s' %
                self.repo.make_generation_spec(genid))

        # Commit or unlock everything.
        self.repo.commit_client(client_name)
        self.repo.commit_chunk_indexes()
        self.repo.remove_unused_chunks()
        self.repo.unlock_everything()
        self.app.dump_memory_profile('after committing')

        self.repo.close()
        self.app.ts.finish()

    def setup_progress_reporting(self):
        self.app.ts['gen'] = None
        self.app.ts['gens'] = []
        self.app.ts.format('forgetting generations: %Index(gen,gens) done')

    def get_genids_to_remove_from_args(self, client_name, args):
        return [
            self.repo.interpret_generation_spec(client_name, genspec)
            for genspec in args]

    def get_all_generations(self, client_name):
        genlist = []
        dt = datetime.datetime(1970, 1, 1, 0, 0, 0)
        for genid in self.repo.get_client_generation_ids(client_name):
            end = self.repo.get_generation_key(
                genid, obnamlib.REPO_GENERATION_ENDED)
            genlist.append((genid, dt.fromtimestamp(end)))
        return genlist

    def choose_genids_to_remove_using_keep_policy(self, genlist):
        fp = obnamlib.ForgetPolicy()
        rules = fp.parse(self.app.settings['keep'])
        keeplist = fp.match(rules, genlist)
        keepids = set(genid for genid, dt in keeplist)
        return [genid for genid, _ in genlist if genid not in keepids]

    def remove(self, genid):
        if self.app.settings['pretend']:
            self.app.ts.notify(
                'Pretending to remove generation %s' %
                self.repo.make_generation_spec(genid))
            return []
        else:
            return self.repo.remove_generation(genid)