diff options
Diffstat (limited to 'obnamlib/fmt_ga')
-rw-r--r-- | obnamlib/fmt_ga/chunk_store.py | 21 | ||||
-rw-r--r-- | obnamlib/fmt_ga/cowtree.py | 29 | ||||
-rw-r--r-- | obnamlib/fmt_ga/cowtree_tests.py | 29 | ||||
-rw-r--r-- | obnamlib/fmt_ga/indexes.py | 69 | ||||
-rw-r--r-- | obnamlib/fmt_ga/leaf.py | 6 | ||||
-rw-r--r-- | obnamlib/fmt_ga/leaf_store.py | 15 | ||||
-rw-r--r-- | obnamlib/fmt_ga/leaf_store_tests.py | 8 | ||||
-rw-r--r-- | obnamlib/fmt_ga/leaf_tests.py | 8 |
8 files changed, 172 insertions, 13 deletions
diff --git a/obnamlib/fmt_ga/chunk_store.py b/obnamlib/fmt_ga/chunk_store.py index 9f916276..da1ed4bd 100644 --- a/obnamlib/fmt_ga/chunk_store.py +++ b/obnamlib/fmt_ga/chunk_store.py @@ -1,4 +1,4 @@ -# Copyright 2015 Lars Wirzenius +# Copyright 2015,2017 Lars Wirzenius # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -65,6 +65,25 @@ class GAChunkStore(object): filename=None) return content + def get_bag_id(self, chunk_id): + bag_id, _ = obnamlib.parse_object_id(chunk_id) + return bag_id + + def get_chunks_in_bag(self, bag_id): + try: + bag = self._bag_store.get_bag(bag_id) + except EnvironmentError: + pass + else: + for i in range(len(bag)): + yield obnamlib.make_object_id(bag_id, i) + + def remove_bag(self, bag_id): + try: + self._bag_store.remove_bag(bag_id) + except EnvironmentError: + pass + def has_chunk(self, chunk_id): # This is ugly, 'cause it requires reading in the whole bag. # We could easily check if the bag exists, but not whether it diff --git a/obnamlib/fmt_ga/cowtree.py b/obnamlib/fmt_ga/cowtree.py index f1f74385..abfa27b7 100644 --- a/obnamlib/fmt_ga/cowtree.py +++ b/obnamlib/fmt_ga/cowtree.py @@ -1,4 +1,4 @@ -# Copyright 2016 Lars Wirzenius +# Copyright 2016-2017 Lars Wirzenius # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -69,7 +69,12 @@ class CowTree(object): self._leaf_list.update_leaf(leaf_id, keys[0], keys[-1]) if len(leaf) > self._max_keys_per_leaf: self._leaf_list.drop_leaf(leaf_id) + self._store.remove_leaf(leaf_id) self._split_leaf(leaf) + else: + self._leaf_list.drop_leaf(leaf_id) + self._make_split_leaf(leaf, list(sorted(leaf.keys()))) + self._store.remove_leaf(leaf_id) def _split_leaf(self, leaf): sorted_keys = list(sorted(leaf.keys())) @@ -85,6 +90,24 @@ class CowTree(object): new_id = self._store.put_leaf(new) self._leaf_list.insert_leaf(sorted_keys[0], sorted_keys[-1], new_id) + def remove(self, key): + leaf_id = self._leaf_list.find_leaf_for_key(key) + if leaf_id is not None: + self._leaf_list.drop_leaf(leaf_id) + leaf = self._store.get_leaf(leaf_id) + leaf.remove(key) + self._store.remove_leaf(leaf_id) + if len(leaf) > 0: + self._make_split_leaf(leaf, list(sorted(leaf.keys()))) + + def keys(self): + leaf_ids = list(self._leaf_list.leaf_ids()) + if leaf_ids: + for leaf_id in leaf_ids: + leaf = self._store.get_leaf(leaf_id) + for key in leaf.keys(): + yield key + def commit(self): fake_leaf = obnamlib.CowLeaf() fake_leaf.insert('leaf_list', self._leaf_list.as_dict()) @@ -106,6 +129,10 @@ class _LeafList(object): def from_dict(self, some_dict): self._leaf_list = some_dict + def leaf_ids(self): + for leaf_info in self._leaf_list: + yield leaf_info['id'] + def find_leaf_for_key(self, key): # If there are no leaves, we can't pick one for key. if not self._leaf_list: diff --git a/obnamlib/fmt_ga/cowtree_tests.py b/obnamlib/fmt_ga/cowtree_tests.py index b34ce2f0..1928a607 100644 --- a/obnamlib/fmt_ga/cowtree_tests.py +++ b/obnamlib/fmt_ga/cowtree_tests.py @@ -1,4 +1,4 @@ -# Copyright 2016 Lars Wirzenius +# Copyright 2016-2017 Lars Wirzenius # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -28,6 +28,9 @@ class CowTreeTests(unittest.TestCase): self.cow = obnamlib.CowTree() self.cow.set_leaf_store(self.ls) + def test_has_no_keys_initially(self): + self.assertEqual(list(self.cow.keys()), []) + def test_lookup_returns_none_if_key_is_missing(self): self.assertEqual(self.cow.lookup(42), None) @@ -36,6 +39,24 @@ class CowTreeTests(unittest.TestCase): value = 'barvalue' self.cow.insert(key, value) self.assertEqual(self.cow.lookup(key), value) + self.assertEqual(list(self.cow.keys()), [key]) + + def test_removes_only_key_in_blob(self): + key = 'fookey' + value = 'barvalue' + self.cow.insert(key, value) + self.cow.remove(key) + self.assertEqual(list(self.cow.keys()), []) + + def test_removes_one_of_the_keys_in_blob(self): + key = 'fookey' + value = 'barvalue' + key2 = 'fookey2' + value2 = 'barvalue2' + self.cow.insert(key, value) + self.cow.insert(key2, value2) + self.cow.remove(key) + self.assertEqual(list(self.cow.keys()), [key2]) def test_inserts_many_keys(self): N = 10 @@ -64,3 +85,9 @@ class CowTreeTests(unittest.TestCase): cow2.set_leaf_store(self.ls) cow2.set_list_node(list_id) self.assertEqual(cow2.lookup(key), value) + + def test_iterates_over_leaf_keys(self): + key = 'fookey' + value = 'barvalue' + self.cow.insert(key, value) + self.assertEqual(list(self.cow.keys()), [key]) diff --git a/obnamlib/fmt_ga/indexes.py b/obnamlib/fmt_ga/indexes.py index b57b3877..7660b214 100644 --- a/obnamlib/fmt_ga/indexes.py +++ b/obnamlib/fmt_ga/indexes.py @@ -1,4 +1,4 @@ -# Copyright 2015-2016 Lars Wirzenius +# Copyright 2015-2017 Lars Wirzenius # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -16,6 +16,7 @@ # =*= License: GPL-3+ =*= +import logging import os import obnamlib @@ -126,6 +127,11 @@ class GAChunkIndexes(object): def put_chunk_into_indexes(self, chunk_id, token, client_id): self._load_data() + client_ids = self._used_by_tree.lookup(chunk_id) + logging.debug( + 'xxx before adding to used-by: chunk %r is used by %r', + chunk_id, client_ids) + self._by_chunk_id_tree.insert(chunk_id, token) chunk_ids = self._by_checksum_tree.lookup(token) @@ -135,12 +141,14 @@ class GAChunkIndexes(object): chunk_ids.append(chunk_id) self._by_checksum_tree.insert(token, chunk_ids) - client_ids = self._used_by_tree.lookup(chunk_id) if client_ids is None: client_ids = [client_id] elif client_id not in client_ids: client_ids.append(client_id) self._used_by_tree.insert(chunk_id, client_ids) + logging.debug( + 'xxx after adding to used-by: chunk %r is used by %r', + chunk_id, client_ids) def find_chunk_ids_by_token(self, token): self._load_data() @@ -164,6 +172,9 @@ class GAChunkIndexes(object): def _remove_used_by(self, chunk_id, client_id): still_used = False client_ids = self._used_by_tree.lookup(chunk_id) + logging.debug( + 'xxx before removing used_by: chunk %r is used by %r', + chunk_id, client_ids) if client_ids is not None and client_id in client_ids: client_ids.remove(client_id) self._used_by_tree.insert(chunk_id, client_ids) @@ -173,13 +184,15 @@ class GAChunkIndexes(object): # We leave an empty list, and use that in # remove_unused_chunks to indicate an unused chunk. pass + logging.debug( + 'xxx after removing used_by: chunk %r is used by %r', + chunk_id, client_ids) return still_used def _remove_chunk_by_id(self, chunk_id): token = self._by_chunk_id_tree.lookup(chunk_id) if token is not None: - # FIXME: Should we have CowTree.delete(key)? - self._by_chunk_id_tree.insert(chunk_id, None) + self._by_chunk_id_tree.remove(chunk_id) return token def _remove_chunk_by_checksum(self, chunk_id, token): @@ -192,8 +205,52 @@ class GAChunkIndexes(object): self._used_by_tree.insert(chunk_id, None) def remove_unused_chunks(self, chunk_store): - # FIXME: This requires having a way to list keys in a CowTree. - pass + unused_chunks = self.get_unused_chunks() + self.drop_unused_chunks(unused_chunks) + + maybe_unused_bags = self.get_bags_containing_chunks( + chunk_store, unused_chunks) + for bag_id in maybe_unused_bags: + chunk_ids = chunk_store.get_chunks_in_bag(bag_id) + logging.debug( + 'remove_unused_chunk: can bag %r be removed?', bag_id) + for chunk_id in chunk_ids: + logging.debug( + 'remove_unused_chunk: chunk %r, used: %r', + chunk_id, self.is_chunk_used_by_anyone(chunk_id)) + if not self.any_chunk_is_used_by_someone(chunk_ids): + logging.debug('remove_unused_chunk: remove bag %r', bag_id) + chunk_store.remove_bag(bag_id) + else: + logging.debug( + 'remove_unused_chunk: bag %r cannot be removed', bag_id) + + def get_unused_chunks(self): + return [ + chunk_id + for chunk_id in self._used_by_tree.keys() + if not self.is_chunk_used_by_anyone(chunk_id) + ] + + def is_chunk_used_by_anyone(self, chunk_id): + return self._used_by_tree.lookup(chunk_id) not in (None, []) + + def drop_unused_chunks(self, chunk_ids): + # By this time we know the chunk is only in the used_by tree. + for chunk_id in chunk_ids: + self._used_by_tree.remove(chunk_id) + + def get_bags_containing_chunks(self, chunk_store, chunk_ids): + return set( + chunk_store.get_bag_id(chunk_id) + for chunk_id in chunk_ids + ) + + def any_chunk_is_used_by_someone(self, chunk_ids): + return any( + self.is_chunk_used_by_anyone(chunk_id) + for chunk_id in chunk_ids + ) def validate_chunk_content(self, chunk_id): return None diff --git a/obnamlib/fmt_ga/leaf.py b/obnamlib/fmt_ga/leaf.py index 6522a6a9..7f293cc3 100644 --- a/obnamlib/fmt_ga/leaf.py +++ b/obnamlib/fmt_ga/leaf.py @@ -1,4 +1,4 @@ -# Copyright 2016 Lars Wirzenius +# Copyright 2016-2017 Lars Wirzenius # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -36,6 +36,10 @@ class CowLeaf(object): def insert(self, key, value): self._dict[key] = value + def remove(self, key): + if key in self._dict: + del self._dict[key] + def as_dict(self): return copy.deepcopy(self._dict) diff --git a/obnamlib/fmt_ga/leaf_store.py b/obnamlib/fmt_ga/leaf_store.py index 5d488e96..56b08faa 100644 --- a/obnamlib/fmt_ga/leaf_store.py +++ b/obnamlib/fmt_ga/leaf_store.py @@ -1,4 +1,4 @@ -# Copyright 2016 Lars Wirzenius +# Copyright 2016-2017 Lars Wirzenius # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -27,6 +27,9 @@ class LeafStoreInterface(object): # pragma: no cover def get_leaf(self, leaf_id): raise NotImplementedError() + def remove_leaf(self, leaf_id): + raise NotImplementedError() + def flush(self): raise NotImplementedError() @@ -45,6 +48,10 @@ class InMemoryLeafStore(LeafStoreInterface): def get_leaf(self, leaf_id): return self._leaves.get(leaf_id, None) + def remove_leaf(self, leaf_id): + if leaf_id in self._leaves: + del self._leaves[leaf_id] + def flush(self): pass @@ -65,5 +72,11 @@ class LeafStore(LeafStoreInterface): # pragma: no cover leaf.from_dict(self._blob_store.get_blob(leaf_id)) return leaf + def remove_leaf(self, leaf_id): + # FIXME: This is a bit ugly, since we need to break the + # bag/blob store abstraction. + bag_id, _ = obnamlib.parse_object_id(leaf_id) + self._blob_store._bag_store.remove_bag(bag_id) + def flush(self): self._blob_store.flush() diff --git a/obnamlib/fmt_ga/leaf_store_tests.py b/obnamlib/fmt_ga/leaf_store_tests.py index 9d1a8f71..049d03e4 100644 --- a/obnamlib/fmt_ga/leaf_store_tests.py +++ b/obnamlib/fmt_ga/leaf_store_tests.py @@ -1,4 +1,4 @@ -# Copyright 2016 Lars Wirzenius +# Copyright 2016-2017 Lars Wirzenius # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -40,6 +40,12 @@ class LeafStoreTests(object): def test_returns_None_if_leaf_is_missing(self): self.assertEqual(self.ls.get_leaf(42), None) + def test_removes_leaf(self): + leaf = {'foo': 'bar'} + leaf_id = self.ls.put_leaf(leaf) + self.ls.remove_leaf(leaf_id) + self.assertEqual(self.ls.get_leaf(leaf_id), None) + def test_has_flush(self): self.assertEqual(self.ls.flush(), None) diff --git a/obnamlib/fmt_ga/leaf_tests.py b/obnamlib/fmt_ga/leaf_tests.py index 6c5f2903..30324dd2 100644 --- a/obnamlib/fmt_ga/leaf_tests.py +++ b/obnamlib/fmt_ga/leaf_tests.py @@ -1,4 +1,4 @@ -# Copyright 2016 Lars Wirzenius +# Copyright 2016-2017 Lars Wirzenius # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -52,6 +52,12 @@ class CowLeafTests(unittest.TestCase): leaf.insert('foo', 'bar') self.assertEqual(leaf.keys(), ['foo']) + def test_removes_key(self): + leaf = obnamlib.CowLeaf() + leaf.insert('foo', 'bar') + leaf.remove('foo') + self.assertEqual(leaf.keys(), []) + def test_dict_round_trip(self): leaf = obnamlib.CowLeaf() leaf.insert('foo', 'bar') |