diff options
Diffstat (limited to 'dupfiles')
-rwxr-xr-x | dupfiles | 28 |
1 files changed, 12 insertions, 16 deletions
@@ -21,7 +21,6 @@ import errno import hashlib import optparse import os -import progressbar import random import stat import sys @@ -108,25 +107,27 @@ class DuplicateFileFinder(object): self.by_size[st.st_size] = [t] def duplicates(self): + if self.progress: + sys.stderr.write('Looking for groups of files of same size\n') skip = [size for size in self.by_size if len(self.by_size[size]) == 1] for size in skip: del self.by_size[size] + if self.progress: + sys.stderr.write('Ignored %d groups of one file each\n' % len(skip)) + sys.stderr.write('There are %d groups of files of same size\n' % + len(self.by_size)) total_bytes = sum(len(tuples) * size for size, tuples in self.by_size.iteritems()) - if self.progress: - widgets = [ - progressbar.FileTransferSpeed(), ' ', - progressbar.Percentage(), ' ', - progressbar.Bar(), ' ', - progressbar.ETA(), - ] - pbar = progressbar.ProgressBar(maxval=total_bytes, widgets=widgets) - pbar.start() - result = [] done_bytes = 0 + ith_group = 0 for size, tuples in sorted(self.by_size.iteritems()): + ith_group += 1 + if self.progress: + sys.stderr.write('Group %d/%d (%d files of %d bytes)\n' % + (ith_group, len(self.by_size), + len(tuples), size)) if len(set((dev, ino) for dev, ino, pathname in tuples)) == 1: # All duplicates are hardlinks to the same inode. Skip. done_bytes += len(tuples) * size @@ -135,11 +136,6 @@ class DuplicateFileFinder(object): result += new_dups done_bytes += len(tuples) * size - if self.progress: - pbar.update(done_bytes) - - if self.progress: - pbar.finish() return result def find_duplicates(self, pathnames): |