summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2017-02-12 18:20:12 +0200
committerLars Wirzenius <liw@liw.fi>2017-02-12 18:20:12 +0200
commitfe6b8f95d41c8a9530f6896688facaf4a6c71c25 (patch)
tree08db5a65891f78c48717aa3a8f525705e1e94a8f
parent5d966ccf3769b7a6da459228ddc529068ba579ff (diff)
downloadobnam-fe6b8f95d41c8a9530f6896688facaf4a6c71c25.tar.gz
Change how uniquely-in-remove-gen computation is done
Rather than computing a large union of all chunk ids in all genearations tha will remain, we iterate over the chunk ids and remove them from the set of chunk ids in the generation that is getting removed. This should cut down on memory use.
-rw-r--r--obnamlib/fmt_ga/client.py30
1 files changed, 13 insertions, 17 deletions
diff --git a/obnamlib/fmt_ga/client.py b/obnamlib/fmt_ga/client.py
index 013a26aa..5623223e 100644
--- a/obnamlib/fmt_ga/client.py
+++ b/obnamlib/fmt_ga/client.py
@@ -230,28 +230,17 @@ class GAClient(object):
})
self._dumper = dumper
- chunks_in_removed = self.get_generation_chunk_ids(gen_number)
+ chunks_to_remove = self.get_generation_chunk_ids(gen_number)
dumper.dump_memory_profile('after getting chunks in removed gen')
- chunks_remaining = self._get_chunk_ids_used_by_generations(remaining)
- dumper.dump_memory_profile(
- 'after getting chunks in remaining generations')
-
- unused_chunks = set(chunks_in_removed).difference(chunks_remaining)
- dumper.dump_memory_profile(
- 'after getting computing set of chunks to remove')
+ for chunk_id in self._generate_chunk_ids_in_generations(remaining):
+ if chunk_id in chunks_to_remove:
+ chunks_to_remove.remove(chunk_id)
+ dumper.dump_memory_profile('after computing chunk uniq to removed gen')
self._generations.set_generations(remaining)
- return list(unused_chunks)
-
- def _get_chunk_ids_used_by_generations(self, generations):
- chunk_ids = set()
- for generation in generations:
- gen_number = generation.get_number()
- chunk_ids = chunk_ids.union(
- set(self.get_generation_chunk_ids(gen_number)))
- return chunk_ids
+ return list(chunks_to_remove)
def get_generation_key(self, gen_number, key):
self._load_data()
@@ -419,6 +408,13 @@ class GAClient(object):
return result
+ def _generate_chunk_ids_in_generations(self, generations):
+ for generation in generations:
+ gen_number = generation.get_number()
+ chunk_ids = self.get_generation_chunk_ids(gen_number)
+ for chunk_id in chunk_ids:
+ yield chunk_id
+
def get_file_children(self, gen_number, filename):
self._load_data()
generation = self._lookup_generation_by_gen_number(gen_number)