From d62f04fd150ac13e122b3e0a47a844f50b9f9349 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sat, 9 Oct 2010 21:00:34 +0100 Subject: Convert progress reporting to happen using ttystatus. --- dupfiles | 61 ++++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 25 deletions(-) diff --git a/dupfiles b/dupfiles index d78b427..8c06c3d 100755 --- a/dupfiles +++ b/dupfiles @@ -25,6 +25,7 @@ import random import stat import sys import time +import ttystatus class FileStats(object): @@ -78,10 +79,13 @@ class DuplicateFileFinder(object): self.progress = progress def collect(self, root): + ts = ttystatus.TerminalStatus() if self.progress: - sys.stderr.write('Scanning %s\n' % root) + ts.add(ttystatus.Literal('Scanning ')) + ts.add(ttystatus.Pathname('dirname')) for dirname, subdirs, filenames in os.walk(root): + ts['dirname'] = dirname subdirs.sort() filenames.sort() pathnames = [os.path.join(dirname, f) for f in filenames] @@ -93,36 +97,43 @@ class DuplicateFileFinder(object): self.by_size[st.st_size].append(t) else: self.by_size[st.st_size] = [t] + ts.clear() def duplicates(self): + total_bytes = sum(len(tuples) * size + for size, tuples in self.by_size.iteritems()) + + ts = ttystatus.TerminalStatus(period=0.5) + ts['done'] = 0 if self.progress: - sys.stderr.write('Looking for groups of files of same size\n') - skip = [size for size in self.by_size if len(self.by_size[size]) == 1] - for size in skip: - del self.by_size[size] - if self.progress: - sys.stderr.write('Ignored %d groups of one file each\n' % len(skip)) - sys.stderr.write('There are %d groups of files of same size\n' % - len(self.by_size)) + ts.add(ttystatus.Literal('Comparing: ')) + ts.add(ttystatus.ByteSize('done')) + ts.add(ttystatus.Literal('/')) + ts.add(ttystatus.ByteSize('total')) + ts.add(ttystatus.Literal(' (')) + ts.add(ttystatus.PercentDone('done', 'total')) + ts.add(ttystatus.Literal('), group ')) + ts.add(ttystatus.Counter('size')) + ts.add(ttystatus.Literal('/')) + ts.add(ttystatus.Literal(str(len(self.by_size)))) + ts.add(ttystatus.Literal(' (')) + ts.add(ttystatus.ByteSize('size')) + ts.add(ttystatus.Literal(')')) - total_bytes = sum(len(tuples) * size - for size, tuples in self.by_size.iteritems()) result = [] - done_bytes = 0 - ith_group = 0 + ith = 0 for size, tuples in sorted(self.by_size.iteritems()): - ith_group += 1 - if self.progress: - sys.stderr.write('Group %d/%d (%d files of %d bytes)\n' % - (ith_group, len(self.by_size), - len(tuples), size)) - if len(set((dev, ino) for dev, ino, pathname in tuples)) == 1: - # All duplicates are hardlinks to the same inode. Skip. - done_bytes += len(tuples) * size - else: - new_dups = self.find_duplicates([p for d, i, p in tuples]) - result += new_dups - done_bytes += len(tuples) * size + ith += 1 + if len(set((dev, ino) for dev, ino, pathname in tuples)) > 1: + # All files are not hardlinks to the same inode. + # (This also excludes groups with just one file.) + result += self.find_duplicates([p for d, i, p in tuples]) + ts['size'] = size + ts['done'] += len(tuples) * size + ts['total'] = total_bytes + + if self.progress: + ts.finish() return result -- cgit v1.2.1