summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2016-06-12 19:47:55 +0300
committerLars Wirzenius <liw@liw.fi>2016-06-12 19:47:55 +0300
commit4acf5b0effb45f02f3283f4943c57e1768e03695 (patch)
tree29427be09c1e3f984635ed815c3f74415b562a3f
parent230cd70f9507b0fee25f1b73f2562293eda7f704 (diff)
downloadobnam-liw/meliae.tar.gz
Dedup only the biggestliw/meliae
-rw-r--r--meliaereader/reader.py28
1 files changed, 21 insertions, 7 deletions
diff --git a/meliaereader/reader.py b/meliaereader/reader.py
index 8432f3f2..1727ff3a 100644
--- a/meliaereader/reader.py
+++ b/meliaereader/reader.py
@@ -101,17 +101,31 @@ class MeliaeReader(object):
def compute_closures(self):
all_refs = self._objs.keys()
+ biggest = []
+ max_biggest = 100
for i, ref in enumerate(all_refs):
- sys.stderr.write('{} closures left\n'.format(
- len(self) - len(self._closures)))
+ if (i % 100) == 0:
+ remaining = len(self) - len(self._closures)
+ sys.stderr.write(
+ '{} of {} closures done, {} remaining\n'.format(
+ i, len(self), remaining))
+
closure = self._simple_get_closure(ref)
- for j in range(0, i):
- j_ref = all_refs[j]
- if self._closures[j_ref] == closure:
- closure = self._closures[j_ref]
- break
+ closure = self._dedup_biggest(biggest, max_biggest, closure)
self._closures[ref] = closure
+ def _dedup_biggest(self, biggest, max_biggest, closure):
+ if biggest and biggest[0][0] > len(closure):
+ return closure
+ for n, big in biggest:
+ if n == len(closure) and big == closure:
+ return big
+ biggest.append((len(closure), closure))
+ biggest.sort()
+ if len(biggest) > max_biggest:
+ del biggest[0] # pragma: no cover
+ return closure
+
def _simple_get_closure(self, ref): # pragma: no cover
closure = set()
todo = set([ref])