diff options
author | Lars Wirzenius <liw@liw.fi> | 2017-02-12 18:20:12 +0200 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2017-02-12 18:20:12 +0200 |
commit | fe6b8f95d41c8a9530f6896688facaf4a6c71c25 (patch) | |
tree | 08db5a65891f78c48717aa3a8f525705e1e94a8f | |
parent | 5d966ccf3769b7a6da459228ddc529068ba579ff (diff) | |
download | obnam-fe6b8f95d41c8a9530f6896688facaf4a6c71c25.tar.gz |
Change how uniquely-in-remove-gen computation is done
Rather than computing a large union of all chunk ids in all
genearations tha will remain, we iterate over the chunk ids and
remove them from the set of chunk ids in the generation that is
getting removed. This should cut down on memory use.
-rw-r--r-- | obnamlib/fmt_ga/client.py | 30 |
1 files changed, 13 insertions, 17 deletions
diff --git a/obnamlib/fmt_ga/client.py b/obnamlib/fmt_ga/client.py index 013a26aa..5623223e 100644 --- a/obnamlib/fmt_ga/client.py +++ b/obnamlib/fmt_ga/client.py @@ -230,28 +230,17 @@ class GAClient(object): }) self._dumper = dumper - chunks_in_removed = self.get_generation_chunk_ids(gen_number) + chunks_to_remove = self.get_generation_chunk_ids(gen_number) dumper.dump_memory_profile('after getting chunks in removed gen') - chunks_remaining = self._get_chunk_ids_used_by_generations(remaining) - dumper.dump_memory_profile( - 'after getting chunks in remaining generations') - - unused_chunks = set(chunks_in_removed).difference(chunks_remaining) - dumper.dump_memory_profile( - 'after getting computing set of chunks to remove') + for chunk_id in self._generate_chunk_ids_in_generations(remaining): + if chunk_id in chunks_to_remove: + chunks_to_remove.remove(chunk_id) + dumper.dump_memory_profile('after computing chunk uniq to removed gen') self._generations.set_generations(remaining) - return list(unused_chunks) - - def _get_chunk_ids_used_by_generations(self, generations): - chunk_ids = set() - for generation in generations: - gen_number = generation.get_number() - chunk_ids = chunk_ids.union( - set(self.get_generation_chunk_ids(gen_number))) - return chunk_ids + return list(chunks_to_remove) def get_generation_key(self, gen_number, key): self._load_data() @@ -419,6 +408,13 @@ class GAClient(object): return result + def _generate_chunk_ids_in_generations(self, generations): + for generation in generations: + gen_number = generation.get_number() + chunk_ids = self.get_generation_chunk_ids(gen_number) + for chunk_id in chunk_ids: + yield chunk_id + def get_file_children(self, gen_number, filename): self._load_data() generation = self._lookup_generation_by_gen_number(gen_number) |