From d218468034cb352cd17107f10f60ea31d7e00fd0 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Mon, 4 Oct 2010 21:24:31 +0100 Subject: Replace progressbar usage with line-based output to stderr. For big sets of files, the progress bar output was not useful. It stayed at the initial 0% stage for very long, with no indication of what is happening. The new code makes for less pretty output, but it is more useful. --- dupfiles | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/dupfiles b/dupfiles index b1de7fc..8acea67 100755 --- a/dupfiles +++ b/dupfiles @@ -21,7 +21,6 @@ import errno import hashlib import optparse import os -import progressbar import random import stat import sys @@ -108,25 +107,27 @@ class DuplicateFileFinder(object): self.by_size[st.st_size] = [t] def duplicates(self): + if self.progress: + sys.stderr.write('Looking for groups of files of same size\n') skip = [size for size in self.by_size if len(self.by_size[size]) == 1] for size in skip: del self.by_size[size] + if self.progress: + sys.stderr.write('Ignored %d groups of one file each\n' % len(skip)) + sys.stderr.write('There are %d groups of files of same size\n' % + len(self.by_size)) total_bytes = sum(len(tuples) * size for size, tuples in self.by_size.iteritems()) - if self.progress: - widgets = [ - progressbar.FileTransferSpeed(), ' ', - progressbar.Percentage(), ' ', - progressbar.Bar(), ' ', - progressbar.ETA(), - ] - pbar = progressbar.ProgressBar(maxval=total_bytes, widgets=widgets) - pbar.start() - result = [] done_bytes = 0 + ith_group = 0 for size, tuples in sorted(self.by_size.iteritems()): + ith_group += 1 + if self.progress: + sys.stderr.write('Group %d/%d (%d files of %d bytes)\n' % + (ith_group, len(self.by_size), + len(tuples), size)) if len(set((dev, ino) for dev, ino, pathname in tuples)) == 1: # All duplicates are hardlinks to the same inode. Skip. done_bytes += len(tuples) * size @@ -135,11 +136,6 @@ class DuplicateFileFinder(object): result += new_dups done_bytes += len(tuples) * size - if self.progress: - pbar.update(done_bytes) - - if self.progress: - pbar.finish() return result def find_duplicates(self, pathnames): -- cgit v1.2.1