summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2010-10-04 21:24:31 +0100
committerLars Wirzenius <liw@liw.fi>2010-10-04 21:24:31 +0100
commitd218468034cb352cd17107f10f60ea31d7e00fd0 (patch)
treee62f6c2f39daef19854e93a489349872e4e91a70
parent99954a66c3cd6c3555d8851e7b8d3ac527ef18c5 (diff)
downloaddupfiles-d218468034cb352cd17107f10f60ea31d7e00fd0.tar.gz
Replace progressbar usage with line-based output to stderr.
For big sets of files, the progress bar output was not useful. It stayed at the initial 0% stage for very long, with no indication of what is happening. The new code makes for less pretty output, but it is more useful.
-rwxr-xr-xdupfiles28
1 files changed, 12 insertions, 16 deletions
diff --git a/dupfiles b/dupfiles
index b1de7fc..8acea67 100755
--- a/dupfiles
+++ b/dupfiles
@@ -21,7 +21,6 @@ import errno
import hashlib
import optparse
import os
-import progressbar
import random
import stat
import sys
@@ -108,25 +107,27 @@ class DuplicateFileFinder(object):
self.by_size[st.st_size] = [t]
def duplicates(self):
+ if self.progress:
+ sys.stderr.write('Looking for groups of files of same size\n')
skip = [size for size in self.by_size if len(self.by_size[size]) == 1]
for size in skip:
del self.by_size[size]
+ if self.progress:
+ sys.stderr.write('Ignored %d groups of one file each\n' % len(skip))
+ sys.stderr.write('There are %d groups of files of same size\n' %
+ len(self.by_size))
total_bytes = sum(len(tuples) * size
for size, tuples in self.by_size.iteritems())
- if self.progress:
- widgets = [
- progressbar.FileTransferSpeed(), ' ',
- progressbar.Percentage(), ' ',
- progressbar.Bar(), ' ',
- progressbar.ETA(),
- ]
- pbar = progressbar.ProgressBar(maxval=total_bytes, widgets=widgets)
- pbar.start()
-
result = []
done_bytes = 0
+ ith_group = 0
for size, tuples in sorted(self.by_size.iteritems()):
+ ith_group += 1
+ if self.progress:
+ sys.stderr.write('Group %d/%d (%d files of %d bytes)\n' %
+ (ith_group, len(self.by_size),
+ len(tuples), size))
if len(set((dev, ino) for dev, ino, pathname in tuples)) == 1:
# All duplicates are hardlinks to the same inode. Skip.
done_bytes += len(tuples) * size
@@ -135,11 +136,6 @@ class DuplicateFileFinder(object):
result += new_dups
done_bytes += len(tuples) * size
- if self.progress:
- pbar.update(done_bytes)
-
- if self.progress:
- pbar.finish()
return result
def find_duplicates(self, pathnames):