Replace progressbar usage with line-based output to stderr.

For big sets of files, the progress bar output was not useful. It stayed at the initial 0% stage for very long, with no indication of what is happening. The new code makes for less pretty output, but it is more useful.
author: Lars Wirzenius <liw@liw.fi> 2010-10-04 21:24:31 +0100
committer: Lars Wirzenius <liw@liw.fi> 2010-10-04 21:24:31 +0100
commit: d218468034cb352cd17107f10f60ea31d7e00fd0 (patch)
tree: e62f6c2f39daef19854e93a489349872e4e91a70 /dupfiles
parent: 99954a66c3cd6c3555d8851e7b8d3ac527ef18c5 (diff)
download: dupfiles-d218468034cb352cd17107f10f60ea31d7e00fd0.tar.gz
1 files changed, 12 insertions, 16 deletions
diff --git a/dupfiles b/dupfiles
index b1de7fc..8acea67 100755
--- a/dupfiles
+++ b/dupfiles
@@ -21,7 +21,6 @@ import errno
 import hashlib
 import optparse
 import os
-import progressbar
 import random
 import stat
 import sys
@@ -108,25 +107,27 @@ class DuplicateFileFinder(object):
                         self.by_size[st.st_size] = [t]
 
     def duplicates(self):
+        if self.progress:
+            sys.stderr.write('Looking for groups of files of same size\n')
         skip = [size for size in self.by_size if len(self.by_size[size]) == 1]
         for size in skip:
             del self.by_size[size]
+        if self.progress:
+            sys.stderr.write('Ignored %d groups of one file each\n' % len(skip))
+            sys.stderr.write('There are %d groups of files of same size\n' %
+                             len(self.by_size))
 
         total_bytes = sum(len(tuples) * size
                             for size, tuples in self.by_size.iteritems())
-        if self.progress:
-            widgets = [
-                progressbar.FileTransferSpeed(), ' ',
-                progressbar.Percentage(), ' ',
-                progressbar.Bar(), ' ', 
-                progressbar.ETA(),
-            ]
-            pbar = progressbar.ProgressBar(maxval=total_bytes, widgets=widgets)
-            pbar.start()
-
         result = []
         done_bytes = 0
+        ith_group = 0
         for size, tuples in sorted(self.by_size.iteritems()):
+            ith_group += 1
+            if self.progress:
+                sys.stderr.write('Group %d/%d (%d files of %d bytes)\n' %
+                                 (ith_group, len(self.by_size), 
+                                  len(tuples), size))
             if len(set((dev, ino) for dev, ino, pathname in tuples)) == 1:
                 # All duplicates are hardlinks to the same inode. Skip.
                 done_bytes += len(tuples) * size
@@ -135,11 +136,6 @@ class DuplicateFileFinder(object):
                 result += new_dups
                 done_bytes += len(tuples) * size
 
-            if self.progress:
-                pbar.update(done_bytes)
-
-        if self.progress:
-            pbar.finish()
         return result
 
     def find_duplicates(self, pathnames):
author	Lars Wirzenius <liw@liw.fi>	2010-10-04 21:24:31 +0100
committer	Lars Wirzenius <liw@liw.fi>	2010-10-04 21:24:31 +0100
commit	d218468034cb352cd17107f10f60ea31d7e00fd0 (patch)
tree	e62f6c2f39daef19854e93a489349872e4e91a70 /dupfiles
parent	99954a66c3cd6c3555d8851e7b8d3ac527ef18c5 (diff)
download	dupfiles-d218468034cb352cd17107f10f60ea31d7e00fd0.tar.gz