summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2010-04-18 14:21:56 +1200
committerLars Wirzenius <liw@liw.fi>2010-04-18 14:21:56 +1200
commit7feaddceb111be033d0f773621c3bb8b13cf0fed (patch)
tree87b9379c989a1bd2cff07a8b77a42b8212d30e45
parent93dd590836516f560fcdc476f68670467d5db2bf (diff)
downloaddupfiles-7feaddceb111be033d0f773621c3bb8b13cf0fed.tar.gz
Improve progress reporting.
-rwxr-xr-xdupfiles54
1 files changed, 32 insertions, 22 deletions
diff --git a/dupfiles b/dupfiles
index cc1ff47..93287f5 100755
--- a/dupfiles
+++ b/dupfiles
@@ -77,32 +77,42 @@ class DuplicateFileFinder(object):
# groups. If a group becomes just a single file, that file is not
# a duplicate and can be dropped completely.
def duplicates(self):
- total_files = sum(len(x) for x in self.by_size.itervalues())
- done_files = 0
+ skip = [size for size in self.by_size if len(self.by_size[size]) == 1]
+ for size in skip:
+ del self.by_size[size]
+
+ total_bytes = sum(len(tuples) * size
+ for size, tuples in self.by_size.iteritems())
+ done_bytes = 0
result = []
- for tuples in self.by_size.itervalues():
- if len(tuples) > 1:
- by_checksum = dict()
- for dev, ino, pathname in tuples:
- checksum = self.file_checksum(pathname)
- if checksum not in by_checksum:
- by_checksum[checksum] = set()
- by_checksum[checksum].add(pathname)
- done_files += 1
- self.duplicates_progress(done_files, total_files)
- for names in by_checksum.itervalues():
- if len(names) > 1:
- result.append(names)
- else:
- done_files += 1
- self.duplicates_progress(done_files, total_files)
+ for size, tuples in self.by_size.iteritems():
+ by_checksum = dict()
+ for dev, ino, pathname in tuples:
+ checksum = self.file_checksum(pathname)
+ if checksum not in by_checksum:
+ by_checksum[checksum] = set()
+ by_checksum[checksum].add(pathname)
+ done_bytes += size
+ self.duplicates_progress(done_bytes, total_bytes)
+ for names in by_checksum.itervalues():
+ if len(names) > 1:
+ result.append(names)
self.progress.finished()
return result
- def duplicates_progress(self, done_files, total_files):
- self.progress.write('%d/%d (%.1f%%) files done' %
- (done_files, total_files,
- 100.0 * float(done_files) / total_files))
+ def duplicates_progress(self, done, total):
+ self.progress.write('%s/%s (%.1f%%) bytes done' %
+ (self.human_size(done), self.human_size(total),
+ 100.0 * float(done) / float(total)))
+
+ def human_size(self, size):
+ tab = [(1024**3, 'GiB'),
+ (1024**2, 'MiB'),
+ (1024**1, 'KiB')]
+ for limit, unit in tab:
+ if size >= limit:
+ return '%.1f %s' % (float(size) / float(limit), unit)
+ return '0 B'
def file_checksum(self, pathname):
return hashlib.md5(file(pathname, 'rb').read()).digest()