summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--obnamlib/backup_progress.py7
-rw-r--r--obnamlib/backup_progress_tests.py2
-rw-r--r--obnamlib/fmt_ga/chunk_store.py21
-rw-r--r--obnamlib/fmt_ga/cowtree.py29
-rw-r--r--obnamlib/fmt_ga/cowtree_tests.py29
-rw-r--r--obnamlib/fmt_ga/indexes.py69
-rw-r--r--obnamlib/fmt_ga/leaf.py6
-rw-r--r--obnamlib/fmt_ga/leaf_store.py15
-rw-r--r--obnamlib/fmt_ga/leaf_store_tests.py8
-rw-r--r--obnamlib/fmt_ga/leaf_tests.py8
-rw-r--r--obnamlib/plugins/forget_plugin.py1
-rw-r--r--pylint.conf1
-rwxr-xr-xtest-ga-forget34
13 files changed, 212 insertions, 18 deletions
diff --git a/obnamlib/backup_progress.py b/obnamlib/backup_progress.py
index 026dd604..4927c49f 100644
--- a/obnamlib/backup_progress.py
+++ b/obnamlib/backup_progress.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2009-2015 Lars Wirzenius
+# Copyright (C) 2009-2017 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -116,7 +116,7 @@ class BackupProgress(object):
def report_stats(self, output, fs, quiet, report=None): # pragma: no cover
if report is None:
report = self.compute_report(fs)
-
+
duration_string = obnamlib.humanise_duration(report['duration'])
chunk_amount, chunk_unit = obnamlib.humanise_size(
@@ -134,7 +134,8 @@ class BackupProgress(object):
overhead_bytes = max(0, overhead_bytes)
overhead_amount, overhead_unit = obnamlib.humanise_size(overhead_bytes)
if report['uploaded-total-bytes'] > 0:
- overhead_percent = 100.0 * overhead_bytes / report['uploaded-total-bytes']
+ overhead_percent = (
+ 100.0 * overhead_bytes / report['uploaded-total-bytes'])
else:
overhead_percent = 0.0
diff --git a/obnamlib/backup_progress_tests.py b/obnamlib/backup_progress_tests.py
index c3d2dc01..7aba84b1 100644
--- a/obnamlib/backup_progress_tests.py
+++ b/obnamlib/backup_progress_tests.py
@@ -103,7 +103,7 @@ class DummyTerminalStatus(object):
def error(self, msg):
pass
-
+
def __setitem__(self, key, value):
pass
diff --git a/obnamlib/fmt_ga/chunk_store.py b/obnamlib/fmt_ga/chunk_store.py
index 9f916276..da1ed4bd 100644
--- a/obnamlib/fmt_ga/chunk_store.py
+++ b/obnamlib/fmt_ga/chunk_store.py
@@ -1,4 +1,4 @@
-# Copyright 2015 Lars Wirzenius
+# Copyright 2015,2017 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -65,6 +65,25 @@ class GAChunkStore(object):
filename=None)
return content
+ def get_bag_id(self, chunk_id):
+ bag_id, _ = obnamlib.parse_object_id(chunk_id)
+ return bag_id
+
+ def get_chunks_in_bag(self, bag_id):
+ try:
+ bag = self._bag_store.get_bag(bag_id)
+ except EnvironmentError:
+ pass
+ else:
+ for i in range(len(bag)):
+ yield obnamlib.make_object_id(bag_id, i)
+
+ def remove_bag(self, bag_id):
+ try:
+ self._bag_store.remove_bag(bag_id)
+ except EnvironmentError:
+ pass
+
def has_chunk(self, chunk_id):
# This is ugly, 'cause it requires reading in the whole bag.
# We could easily check if the bag exists, but not whether it
diff --git a/obnamlib/fmt_ga/cowtree.py b/obnamlib/fmt_ga/cowtree.py
index f1f74385..abfa27b7 100644
--- a/obnamlib/fmt_ga/cowtree.py
+++ b/obnamlib/fmt_ga/cowtree.py
@@ -1,4 +1,4 @@
-# Copyright 2016 Lars Wirzenius
+# Copyright 2016-2017 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -69,7 +69,12 @@ class CowTree(object):
self._leaf_list.update_leaf(leaf_id, keys[0], keys[-1])
if len(leaf) > self._max_keys_per_leaf:
self._leaf_list.drop_leaf(leaf_id)
+ self._store.remove_leaf(leaf_id)
self._split_leaf(leaf)
+ else:
+ self._leaf_list.drop_leaf(leaf_id)
+ self._make_split_leaf(leaf, list(sorted(leaf.keys())))
+ self._store.remove_leaf(leaf_id)
def _split_leaf(self, leaf):
sorted_keys = list(sorted(leaf.keys()))
@@ -85,6 +90,24 @@ class CowTree(object):
new_id = self._store.put_leaf(new)
self._leaf_list.insert_leaf(sorted_keys[0], sorted_keys[-1], new_id)
+ def remove(self, key):
+ leaf_id = self._leaf_list.find_leaf_for_key(key)
+ if leaf_id is not None:
+ self._leaf_list.drop_leaf(leaf_id)
+ leaf = self._store.get_leaf(leaf_id)
+ leaf.remove(key)
+ self._store.remove_leaf(leaf_id)
+ if len(leaf) > 0:
+ self._make_split_leaf(leaf, list(sorted(leaf.keys())))
+
+ def keys(self):
+ leaf_ids = list(self._leaf_list.leaf_ids())
+ if leaf_ids:
+ for leaf_id in leaf_ids:
+ leaf = self._store.get_leaf(leaf_id)
+ for key in leaf.keys():
+ yield key
+
def commit(self):
fake_leaf = obnamlib.CowLeaf()
fake_leaf.insert('leaf_list', self._leaf_list.as_dict())
@@ -106,6 +129,10 @@ class _LeafList(object):
def from_dict(self, some_dict):
self._leaf_list = some_dict
+ def leaf_ids(self):
+ for leaf_info in self._leaf_list:
+ yield leaf_info['id']
+
def find_leaf_for_key(self, key):
# If there are no leaves, we can't pick one for key.
if not self._leaf_list:
diff --git a/obnamlib/fmt_ga/cowtree_tests.py b/obnamlib/fmt_ga/cowtree_tests.py
index b34ce2f0..1928a607 100644
--- a/obnamlib/fmt_ga/cowtree_tests.py
+++ b/obnamlib/fmt_ga/cowtree_tests.py
@@ -1,4 +1,4 @@
-# Copyright 2016 Lars Wirzenius
+# Copyright 2016-2017 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -28,6 +28,9 @@ class CowTreeTests(unittest.TestCase):
self.cow = obnamlib.CowTree()
self.cow.set_leaf_store(self.ls)
+ def test_has_no_keys_initially(self):
+ self.assertEqual(list(self.cow.keys()), [])
+
def test_lookup_returns_none_if_key_is_missing(self):
self.assertEqual(self.cow.lookup(42), None)
@@ -36,6 +39,24 @@ class CowTreeTests(unittest.TestCase):
value = 'barvalue'
self.cow.insert(key, value)
self.assertEqual(self.cow.lookup(key), value)
+ self.assertEqual(list(self.cow.keys()), [key])
+
+ def test_removes_only_key_in_blob(self):
+ key = 'fookey'
+ value = 'barvalue'
+ self.cow.insert(key, value)
+ self.cow.remove(key)
+ self.assertEqual(list(self.cow.keys()), [])
+
+ def test_removes_one_of_the_keys_in_blob(self):
+ key = 'fookey'
+ value = 'barvalue'
+ key2 = 'fookey2'
+ value2 = 'barvalue2'
+ self.cow.insert(key, value)
+ self.cow.insert(key2, value2)
+ self.cow.remove(key)
+ self.assertEqual(list(self.cow.keys()), [key2])
def test_inserts_many_keys(self):
N = 10
@@ -64,3 +85,9 @@ class CowTreeTests(unittest.TestCase):
cow2.set_leaf_store(self.ls)
cow2.set_list_node(list_id)
self.assertEqual(cow2.lookup(key), value)
+
+ def test_iterates_over_leaf_keys(self):
+ key = 'fookey'
+ value = 'barvalue'
+ self.cow.insert(key, value)
+ self.assertEqual(list(self.cow.keys()), [key])
diff --git a/obnamlib/fmt_ga/indexes.py b/obnamlib/fmt_ga/indexes.py
index b57b3877..7660b214 100644
--- a/obnamlib/fmt_ga/indexes.py
+++ b/obnamlib/fmt_ga/indexes.py
@@ -1,4 +1,4 @@
-# Copyright 2015-2016 Lars Wirzenius
+# Copyright 2015-2017 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -16,6 +16,7 @@
# =*= License: GPL-3+ =*=
+import logging
import os
import obnamlib
@@ -126,6 +127,11 @@ class GAChunkIndexes(object):
def put_chunk_into_indexes(self, chunk_id, token, client_id):
self._load_data()
+ client_ids = self._used_by_tree.lookup(chunk_id)
+ logging.debug(
+ 'xxx before adding to used-by: chunk %r is used by %r',
+ chunk_id, client_ids)
+
self._by_chunk_id_tree.insert(chunk_id, token)
chunk_ids = self._by_checksum_tree.lookup(token)
@@ -135,12 +141,14 @@ class GAChunkIndexes(object):
chunk_ids.append(chunk_id)
self._by_checksum_tree.insert(token, chunk_ids)
- client_ids = self._used_by_tree.lookup(chunk_id)
if client_ids is None:
client_ids = [client_id]
elif client_id not in client_ids:
client_ids.append(client_id)
self._used_by_tree.insert(chunk_id, client_ids)
+ logging.debug(
+ 'xxx after adding to used-by: chunk %r is used by %r',
+ chunk_id, client_ids)
def find_chunk_ids_by_token(self, token):
self._load_data()
@@ -164,6 +172,9 @@ class GAChunkIndexes(object):
def _remove_used_by(self, chunk_id, client_id):
still_used = False
client_ids = self._used_by_tree.lookup(chunk_id)
+ logging.debug(
+ 'xxx before removing used_by: chunk %r is used by %r',
+ chunk_id, client_ids)
if client_ids is not None and client_id in client_ids:
client_ids.remove(client_id)
self._used_by_tree.insert(chunk_id, client_ids)
@@ -173,13 +184,15 @@ class GAChunkIndexes(object):
# We leave an empty list, and use that in
# remove_unused_chunks to indicate an unused chunk.
pass
+ logging.debug(
+ 'xxx after removing used_by: chunk %r is used by %r',
+ chunk_id, client_ids)
return still_used
def _remove_chunk_by_id(self, chunk_id):
token = self._by_chunk_id_tree.lookup(chunk_id)
if token is not None:
- # FIXME: Should we have CowTree.delete(key)?
- self._by_chunk_id_tree.insert(chunk_id, None)
+ self._by_chunk_id_tree.remove(chunk_id)
return token
def _remove_chunk_by_checksum(self, chunk_id, token):
@@ -192,8 +205,52 @@ class GAChunkIndexes(object):
self._used_by_tree.insert(chunk_id, None)
def remove_unused_chunks(self, chunk_store):
- # FIXME: This requires having a way to list keys in a CowTree.
- pass
+ unused_chunks = self.get_unused_chunks()
+ self.drop_unused_chunks(unused_chunks)
+
+ maybe_unused_bags = self.get_bags_containing_chunks(
+ chunk_store, unused_chunks)
+ for bag_id in maybe_unused_bags:
+ chunk_ids = chunk_store.get_chunks_in_bag(bag_id)
+ logging.debug(
+ 'remove_unused_chunk: can bag %r be removed?', bag_id)
+ for chunk_id in chunk_ids:
+ logging.debug(
+ 'remove_unused_chunk: chunk %r, used: %r',
+ chunk_id, self.is_chunk_used_by_anyone(chunk_id))
+ if not self.any_chunk_is_used_by_someone(chunk_ids):
+ logging.debug('remove_unused_chunk: remove bag %r', bag_id)
+ chunk_store.remove_bag(bag_id)
+ else:
+ logging.debug(
+ 'remove_unused_chunk: bag %r cannot be removed', bag_id)
+
+ def get_unused_chunks(self):
+ return [
+ chunk_id
+ for chunk_id in self._used_by_tree.keys()
+ if not self.is_chunk_used_by_anyone(chunk_id)
+ ]
+
+ def is_chunk_used_by_anyone(self, chunk_id):
+ return self._used_by_tree.lookup(chunk_id) not in (None, [])
+
+ def drop_unused_chunks(self, chunk_ids):
+ # By this time we know the chunk is only in the used_by tree.
+ for chunk_id in chunk_ids:
+ self._used_by_tree.remove(chunk_id)
+
+ def get_bags_containing_chunks(self, chunk_store, chunk_ids):
+ return set(
+ chunk_store.get_bag_id(chunk_id)
+ for chunk_id in chunk_ids
+ )
+
+ def any_chunk_is_used_by_someone(self, chunk_ids):
+ return any(
+ self.is_chunk_used_by_anyone(chunk_id)
+ for chunk_id in chunk_ids
+ )
def validate_chunk_content(self, chunk_id):
return None
diff --git a/obnamlib/fmt_ga/leaf.py b/obnamlib/fmt_ga/leaf.py
index 6522a6a9..7f293cc3 100644
--- a/obnamlib/fmt_ga/leaf.py
+++ b/obnamlib/fmt_ga/leaf.py
@@ -1,4 +1,4 @@
-# Copyright 2016 Lars Wirzenius
+# Copyright 2016-2017 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -36,6 +36,10 @@ class CowLeaf(object):
def insert(self, key, value):
self._dict[key] = value
+ def remove(self, key):
+ if key in self._dict:
+ del self._dict[key]
+
def as_dict(self):
return copy.deepcopy(self._dict)
diff --git a/obnamlib/fmt_ga/leaf_store.py b/obnamlib/fmt_ga/leaf_store.py
index 5d488e96..56b08faa 100644
--- a/obnamlib/fmt_ga/leaf_store.py
+++ b/obnamlib/fmt_ga/leaf_store.py
@@ -1,4 +1,4 @@
-# Copyright 2016 Lars Wirzenius
+# Copyright 2016-2017 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -27,6 +27,9 @@ class LeafStoreInterface(object): # pragma: no cover
def get_leaf(self, leaf_id):
raise NotImplementedError()
+ def remove_leaf(self, leaf_id):
+ raise NotImplementedError()
+
def flush(self):
raise NotImplementedError()
@@ -45,6 +48,10 @@ class InMemoryLeafStore(LeafStoreInterface):
def get_leaf(self, leaf_id):
return self._leaves.get(leaf_id, None)
+ def remove_leaf(self, leaf_id):
+ if leaf_id in self._leaves:
+ del self._leaves[leaf_id]
+
def flush(self):
pass
@@ -65,5 +72,11 @@ class LeafStore(LeafStoreInterface): # pragma: no cover
leaf.from_dict(self._blob_store.get_blob(leaf_id))
return leaf
+ def remove_leaf(self, leaf_id):
+ # FIXME: This is a bit ugly, since we need to break the
+ # bag/blob store abstraction.
+ bag_id, _ = obnamlib.parse_object_id(leaf_id)
+ self._blob_store._bag_store.remove_bag(bag_id)
+
def flush(self):
self._blob_store.flush()
diff --git a/obnamlib/fmt_ga/leaf_store_tests.py b/obnamlib/fmt_ga/leaf_store_tests.py
index 9d1a8f71..049d03e4 100644
--- a/obnamlib/fmt_ga/leaf_store_tests.py
+++ b/obnamlib/fmt_ga/leaf_store_tests.py
@@ -1,4 +1,4 @@
-# Copyright 2016 Lars Wirzenius
+# Copyright 2016-2017 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -40,6 +40,12 @@ class LeafStoreTests(object):
def test_returns_None_if_leaf_is_missing(self):
self.assertEqual(self.ls.get_leaf(42), None)
+ def test_removes_leaf(self):
+ leaf = {'foo': 'bar'}
+ leaf_id = self.ls.put_leaf(leaf)
+ self.ls.remove_leaf(leaf_id)
+ self.assertEqual(self.ls.get_leaf(leaf_id), None)
+
def test_has_flush(self):
self.assertEqual(self.ls.flush(), None)
diff --git a/obnamlib/fmt_ga/leaf_tests.py b/obnamlib/fmt_ga/leaf_tests.py
index 6c5f2903..30324dd2 100644
--- a/obnamlib/fmt_ga/leaf_tests.py
+++ b/obnamlib/fmt_ga/leaf_tests.py
@@ -1,4 +1,4 @@
-# Copyright 2016 Lars Wirzenius
+# Copyright 2016-2017 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -52,6 +52,12 @@ class CowLeafTests(unittest.TestCase):
leaf.insert('foo', 'bar')
self.assertEqual(leaf.keys(), ['foo'])
+ def test_removes_key(self):
+ leaf = obnamlib.CowLeaf()
+ leaf.insert('foo', 'bar')
+ leaf.remove('foo')
+ self.assertEqual(leaf.keys(), [])
+
def test_dict_round_trip(self):
leaf = obnamlib.CowLeaf()
leaf.insert('foo', 'bar')
diff --git a/obnamlib/plugins/forget_plugin.py b/obnamlib/plugins/forget_plugin.py
index d5c44530..dea759ba 100644
--- a/obnamlib/plugins/forget_plugin.py
+++ b/obnamlib/plugins/forget_plugin.py
@@ -103,7 +103,6 @@ class ForgetPlugin(obnamlib.ObnamPlugin):
# Commit or unlock everything.
self.repo.commit_client(client_name)
self.repo.commit_chunk_indexes()
- self.repo.remove_unused_chunks()
self.repo.unlock_everything()
self.app.dump_memory_profile('after committing')
diff --git a/pylint.conf b/pylint.conf
index 146a84ee..e21137e9 100644
--- a/pylint.conf
+++ b/pylint.conf
@@ -15,6 +15,7 @@ disable=
missing-docstring,
no-self-use,
protected-access,
+ redefined-builtin,
redefined-variable-type,
star-args,
too-few-public-methods,
diff --git a/test-ga-forget b/test-ga-forget
new file mode 100755
index 00000000..9bee8531
--- /dev/null
+++ b/test-ga-forget
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Copyright 2017 Lars Wirzenius
+
+set -eu
+
+obnam()
+{
+ env | grep OBNAM_PROFILE
+ ./obnam --no-default-config \
+ --repository t.repo \
+ --repository-format green-albatross-20160813 \
+ --root t.data \
+ --log t.log --log-level debug \
+ "$@"
+}
+
+rm -rf t.data t.repo t.log t.*.prof
+genbackupdata --create 100M t.data
+OBNAM_PROFILE=t.backup.prof obnam backup
+genid="$(obnam genids)"
+OBNAM_PROFILE=t.forget.prof obnam forget "$genid"
+
+echo
+size="$(du -sm t.repo | awk '{print $1}')"
+echo "Repository size: $size"
+echo -n "Generations: "
+obnam genids | tr '\n' ' '
+echo
+
+if [ "$size" -gt 1 ]
+then
+ echo "FORGET DIDN'T REMOVE DATA" 1>&2
+ exit 1
+fi