From 4559000a11c1a17b6ddcf7d9617020c6b7dab9c9 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Thu, 29 Apr 2010 19:55:29 +1200 Subject: Add progress reporting when finding duplicates. --- dupfiles | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/dupfiles b/dupfiles index 35742f1..1aa9dc0 100755 --- a/dupfiles +++ b/dupfiles @@ -80,8 +80,12 @@ class DuplicateFileFinder(object): for size, tuples in self.by_size.iteritems(): if len(set((dev, ino) for dev, ino, pathname in tuples)) == 1: # All duplicates are hardlinks to the same inode. Skip. - continue - result += self.find_duplicates([p for d, i, p in tuples]) + done_bytes += len(tuples) * size + else: + new_dups = self.find_duplicates([p for d, i, p in tuples]) + result += new_dups + done_bytes += len(new_dups) * size + self.duplicates_progress(done_bytes, total_bytes, start_time) self.progress.finished() return result @@ -146,14 +150,10 @@ class DuplicateFileFinder(object): def duplicates_progress(self, done, total, started): duration = time.time() - started - if duration < 1: - speed = 0 - else: - speed = done / duration - self.progress.write('%s/%s (%.1f%%) done (%s/s)' % + self.progress.write('%s/%s (%.1f%%) done (%s)' % (self.human_size(done), self.human_size(total), 100.0 * float(done) / float(total), - self.human_size(speed))) + self.human_duration(duration))) def human_size(self, size): tab = [(1024**3, 'GiB'), @@ -164,6 +164,19 @@ class DuplicateFileFinder(object): return '%.1f %s' % (float(size) / float(limit), unit) return '0 B' + def human_duration(self, duration): + units = [(3600, 'h'), + (60, 'min'), + (1, 's')] + + parts = [] + for limit, unit in units: + count = int(duration) / limit + duration %= limit + if count > 0: + parts.append('%d %s' % (count, unit)) + return ' '.join(parts or ['0 s']) + def make_hardlinks(duplicates): canonical = duplicates.pop() -- cgit v1.2.1