diff options
author | Lars Wirzenius <liw@liw.fi> | 2016-06-12 17:00:14 +0300 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2016-06-12 17:00:14 +0300 |
commit | dce6ebac72e97b38a416ba07eef1dd19d7e73cdc (patch) | |
tree | f00633720b8b0f8cd4516b6640134324c7f1c7e7 | |
parent | c856b972c05a9a8418a670135652c69b515f2dea (diff) | |
download | obnam-dce6ebac72e97b38a416ba07eef1dd19d7e73cdc.tar.gz |
Optimise closures
-rw-r--r-- | meliaereader/reader.py | 50 | ||||
-rw-r--r-- | meliaereader/reader_tests.py | 8 |
2 files changed, 44 insertions, 14 deletions
diff --git a/meliaereader/reader.py b/meliaereader/reader.py index e3605437..a6961cda 100644 --- a/meliaereader/reader.py +++ b/meliaereader/reader.py @@ -26,8 +26,8 @@ import obnamlib class MeliaeReader(object): def __init__(self): - self._objs = {} - self._closures = {} + self._objs = {} # ref to object + self._closures = {} # ref to list of refs def __iter__(self): return iter(self._objs.values()) @@ -43,6 +43,7 @@ class MeliaeReader(object): for line in f: obj = json.loads(line) self._objs[obj['address']] = obj + self.compute_closures() def get_total_size(self): return self.get_size(self._objs.values()) @@ -54,22 +55,43 @@ class MeliaeReader(object): return set(o['type'] for o in self) def get_obnam_types(self): # pragma: no cover - return set(o['type'] for o in self if hasattr(obnamlib, o['type'])) + return set( + o['type'] + for o in self + if hasattr(obnamlib, o['type']) + ) def get_objs_of_type(self, typename): return [o for o in self if o['type'] == typename] - def get_closure(self, obj): - sys.stderr.write('get_closure({})\n'.format(obj['address'])) + def compute_closures(self): + while True: + refs = self.find_trivial_closures() + if not refs: + break + for ref in refs: + assert ref not in self._closures + self._closures[ref] = self.get_trivial_closure(ref) + + def find_trivial_closures(self): + refs = [] + for ref in self._objs: + if ref not in self._closures: + obj = self.get_object(ref) + if all((child_ref in self._closures) for child_ref in obj['refs']): + refs.append(ref) + return refs + + def get_trivial_closure(self, ref): + obj = self.get_object(ref) + refs = set([ref]) + for child_ref in obj['refs']: + refs = refs.union(self._closures.get(child_ref, set())) + return refs + + def get_closure(self, obj, indent=0): ref = obj['address'] - if ref not in self._closures: - refs = self._closures[ref] = set() - refs.add(ref) - for child_ref in obj['refs']: - if child_ref in self: - child = self.get_object(child_ref) - for child_obj in self.get_closure(child): - refs.add(child_obj['address']) + assert ref in self._closures return [self.get_object(r) for r in self._closures[ref]] def get_object(self, ref): @@ -81,6 +103,6 @@ class MeliaeReader(object): sys.stderr.write('get_closure_of_type({})\n'.format(typename)) type_closure = {} for obj in self.get_objs_of_type(typename): - for o in self.get_closure(obj): + for o in self.get_closure(obj, indent=1): type_closure[o['address']] = o return type_closure.values() diff --git a/meliaereader/reader_tests.py b/meliaereader/reader_tests.py index 4ef80483..2c1134ba 100644 --- a/meliaereader/reader_tests.py +++ b/meliaereader/reader_tests.py @@ -42,6 +42,14 @@ class MeliaeReaderTests(unittest.TestCase): return filename def make_object(self, **kwargs): + defaults = { + 'size': 0, + 'refs': [], + 'type': 'unknown', + } + for key, value in defaults.items(): + if key not in kwargs: + kwargs[key] = value return kwargs def test_has_no_objects_initially(self): |