summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2010-04-29 19:55:29 +1200
committerLars Wirzenius <liw@liw.fi>2010-04-29 19:55:29 +1200
commit4559000a11c1a17b6ddcf7d9617020c6b7dab9c9 (patch)
tree0d7c295ede64ae02f88fd61d73f8ee327171ad6a
parent6ee3695ddbdd592fa0f1ab26641abcebeb45eab1 (diff)
downloaddupfiles-4559000a11c1a17b6ddcf7d9617020c6b7dab9c9.tar.gz
Add progress reporting when finding duplicates.
-rwxr-xr-xdupfiles29
1 files changed, 21 insertions, 8 deletions
diff --git a/dupfiles b/dupfiles
index 35742f1..1aa9dc0 100755
--- a/dupfiles
+++ b/dupfiles
@@ -80,8 +80,12 @@ class DuplicateFileFinder(object):
for size, tuples in self.by_size.iteritems():
if len(set((dev, ino) for dev, ino, pathname in tuples)) == 1:
# All duplicates are hardlinks to the same inode. Skip.
- continue
- result += self.find_duplicates([p for d, i, p in tuples])
+ done_bytes += len(tuples) * size
+ else:
+ new_dups = self.find_duplicates([p for d, i, p in tuples])
+ result += new_dups
+ done_bytes += len(new_dups) * size
+ self.duplicates_progress(done_bytes, total_bytes, start_time)
self.progress.finished()
return result
@@ -146,14 +150,10 @@ class DuplicateFileFinder(object):
def duplicates_progress(self, done, total, started):
duration = time.time() - started
- if duration < 1:
- speed = 0
- else:
- speed = done / duration
- self.progress.write('%s/%s (%.1f%%) done (%s/s)' %
+ self.progress.write('%s/%s (%.1f%%) done (%s)' %
(self.human_size(done), self.human_size(total),
100.0 * float(done) / float(total),
- self.human_size(speed)))
+ self.human_duration(duration)))
def human_size(self, size):
tab = [(1024**3, 'GiB'),
@@ -164,6 +164,19 @@ class DuplicateFileFinder(object):
return '%.1f %s' % (float(size) / float(limit), unit)
return '0 B'
+ def human_duration(self, duration):
+ units = [(3600, 'h'),
+ (60, 'min'),
+ (1, 's')]
+
+ parts = []
+ for limit, unit in units:
+ count = int(duration) / limit
+ duration %= limit
+ if count > 0:
+ parts.append('%d %s' % (count, unit))
+ return ' '.join(parts or ['0 s'])
+
def make_hardlinks(duplicates):
canonical = duplicates.pop()