summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2017-02-12 18:44:01 +0200
committerLars Wirzenius <liw@liw.fi>2017-02-12 18:44:01 +0200
commit2c734daf6b4a252b2ba0adffa08fef7e1c23830e (patch)
tree0c471e05e59e99f80f5faa045c2db61e52b819c2
parentfe6b8f95d41c8a9530f6896688facaf4a6c71c25 (diff)
downloadobnam-2c734daf6b4a252b2ba0adffa08fef7e1c23830e.tar.gz
Use more generators to save memory
This might cost in terms of time.
-rw-r--r--obnamlib/fmt_ga/client.py20
1 files changed, 9 insertions, 11 deletions
diff --git a/obnamlib/fmt_ga/client.py b/obnamlib/fmt_ga/client.py
index 5623223e..370ca98d 100644
--- a/obnamlib/fmt_ga/client.py
+++ b/obnamlib/fmt_ga/client.py
@@ -230,7 +230,7 @@ class GAClient(object):
})
self._dumper = dumper
- chunks_to_remove = self.get_generation_chunk_ids(gen_number)
+ chunks_to_remove = set(self._generate_generation_chunk_ids(gen_number))
dumper.dump_memory_profile('after getting chunks in removed gen')
for chunk_id in self._generate_chunk_ids_in_generations(remaining):
@@ -377,6 +377,9 @@ class GAClient(object):
filename=filename)
def get_generation_chunk_ids(self, gen_number):
+ return list(self._generate_generation_chunk_ids(gen_number))
+
+ def _generate_generation_chunk_ids(self, gen_number):
if hasattr(self, '_dumper'):
dump = self._dumper.dump_memory_profile
else:
@@ -393,26 +396,21 @@ class GAClient(object):
metadata = generation.get_file_metadata()
dump('after getting file metadata for generation')
- union = set()
n = 0
for filename in metadata:
n += 1
if (n % 1000) == 0:
dump('gen has at least {} files'.format(n))
- union = union.union(set(metadata.get_file_chunk_ids(filename)))
- dump('after building union of sets')
+ chunk_ids = metadata.get_file_chunk_ids(filename)
+ for chunk_id in chunk_ids:
+ yield chunk_id
+ dump('after yielding all chunk ids')
dump('gen has {} files'.format(n))
- result = list(union)
- dump('after constructing result')
-
- return result
-
def _generate_chunk_ids_in_generations(self, generations):
for generation in generations:
gen_number = generation.get_number()
- chunk_ids = self.get_generation_chunk_ids(gen_number)
- for chunk_id in chunk_ids:
+ for chunk_id in self._generate_generation_chunk_ids(gen_number):
yield chunk_id
def get_file_children(self, gen_number, filename):