diff options
author | Lars Wirzenius <liw@liw.fi> | 2017-02-12 18:44:01 +0200 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2017-02-12 18:44:01 +0200 |
commit | 2c734daf6b4a252b2ba0adffa08fef7e1c23830e (patch) | |
tree | 0c471e05e59e99f80f5faa045c2db61e52b819c2 | |
parent | fe6b8f95d41c8a9530f6896688facaf4a6c71c25 (diff) | |
download | obnam-2c734daf6b4a252b2ba0adffa08fef7e1c23830e.tar.gz |
Use more generators to save memory
This might cost in terms of time.
-rw-r--r-- | obnamlib/fmt_ga/client.py | 20 |
1 files changed, 9 insertions, 11 deletions
diff --git a/obnamlib/fmt_ga/client.py b/obnamlib/fmt_ga/client.py index 5623223e..370ca98d 100644 --- a/obnamlib/fmt_ga/client.py +++ b/obnamlib/fmt_ga/client.py @@ -230,7 +230,7 @@ class GAClient(object): }) self._dumper = dumper - chunks_to_remove = self.get_generation_chunk_ids(gen_number) + chunks_to_remove = set(self._generate_generation_chunk_ids(gen_number)) dumper.dump_memory_profile('after getting chunks in removed gen') for chunk_id in self._generate_chunk_ids_in_generations(remaining): @@ -377,6 +377,9 @@ class GAClient(object): filename=filename) def get_generation_chunk_ids(self, gen_number): + return list(self._generate_generation_chunk_ids(gen_number)) + + def _generate_generation_chunk_ids(self, gen_number): if hasattr(self, '_dumper'): dump = self._dumper.dump_memory_profile else: @@ -393,26 +396,21 @@ class GAClient(object): metadata = generation.get_file_metadata() dump('after getting file metadata for generation') - union = set() n = 0 for filename in metadata: n += 1 if (n % 1000) == 0: dump('gen has at least {} files'.format(n)) - union = union.union(set(metadata.get_file_chunk_ids(filename))) - dump('after building union of sets') + chunk_ids = metadata.get_file_chunk_ids(filename) + for chunk_id in chunk_ids: + yield chunk_id + dump('after yielding all chunk ids') dump('gen has {} files'.format(n)) - result = list(union) - dump('after constructing result') - - return result - def _generate_chunk_ids_in_generations(self, generations): for generation in generations: gen_number = generation.get_number() - chunk_ids = self.get_generation_chunk_ids(gen_number) - for chunk_id in chunk_ids: + for chunk_id in self._generate_generation_chunk_ids(gen_number): yield chunk_id def get_file_children(self, gen_number, filename): |