diff options
Diffstat (limited to 'dupfiles')
-rwxr-xr-x | dupfiles | 83 |
1 files changed, 44 insertions, 39 deletions
@@ -21,23 +21,11 @@ import errno import hashlib import optparse import os -import progressbar import random import stat import sys import time - - -class ProgressBarValue(progressbar.ProgressBarWidget): - - def update(self, pbar): - return '%s' % pbar.currval - - -class ProgressBarMaxval(progressbar.ProgressBarWidget): - - def update(self, pbar): - return '%s' % pbar.maxval +import ttystatus class FileStats(object): @@ -92,10 +80,13 @@ class DuplicateFileFinder(object): self.progress = progress def collect(self, root): + ts = ttystatus.TerminalStatus() if self.progress: - sys.stderr.write('Scanning %s\n' % root) + ts.add(ttystatus.Literal('Scanning ')) + ts.add(ttystatus.Pathname('dirname')) for dirname, subdirs, filenames in os.walk(root): + ts['dirname'] = dirname subdirs.sort() filenames.sort() pathnames = [os.path.join(dirname, f) for f in filenames] @@ -107,40 +98,44 @@ class DuplicateFileFinder(object): self.by_size[st.st_size].append(t) else: self.by_size[st.st_size] = [t] + ts.clear() def duplicates(self): - skip = [size for size in self.by_size if len(self.by_size[size]) == 1] - for size in skip: - del self.by_size[size] - total_bytes = sum(len(tuples) * size - for size, tuples in self.by_size.iteritems()) + for size, tuples in self.by_size.iteritems()) + + ts = ttystatus.TerminalStatus(period=0.5) + ts['done'] = 0 if self.progress: - widgets = [ - progressbar.FileTransferSpeed(), ' ', - progressbar.Percentage(), ' ', - progressbar.Bar(), ' ', - progressbar.ETA(), - ] - pbar = progressbar.ProgressBar(maxval=total_bytes, widgets=widgets) - pbar.start() + ts.add(ttystatus.Literal('Comparing: ')) + ts.add(ttystatus.ByteSize('done')) + ts.add(ttystatus.Literal('/')) + ts.add(ttystatus.ByteSize('total')) + ts.add(ttystatus.Literal(' (')) + ts.add(ttystatus.PercentDone('done', 'total')) + ts.add(ttystatus.Literal('), group ')) + ts.add(ttystatus.Counter('size')) + ts.add(ttystatus.Literal('/')) + ts.add(ttystatus.Literal(str(len(self.by_size)))) + ts.add(ttystatus.Literal(' (')) + ts.add(ttystatus.ByteSize('size')) + ts.add(ttystatus.Literal(')')) result = [] - done_bytes = 0 + ith = 0 for size, tuples in sorted(self.by_size.iteritems()): - if len(set((dev, ino) for dev, ino, pathname in tuples)) == 1: - # All duplicates are hardlinks to the same inode. Skip. - done_bytes += len(tuples) * size - else: - new_dups = self.find_duplicates([p for d, i, p in tuples]) - result += new_dups - done_bytes += len(tuples) * size - - if self.progress: - pbar.update(done_bytes) + ith += 1 + if len(set((dev, ino) for dev, ino, pathname in tuples)) > 1: + # All files are not hardlinks to the same inode. + # (This also excludes groups with just one file.) + result += self.find_duplicates([p for d, i, p in tuples]) + ts['size'] = size + ts['done'] += len(tuples) * size + ts['total'] = total_bytes if self.progress: - pbar.finish() + ts.finish() + return result def find_duplicates(self, pathnames): @@ -203,6 +198,12 @@ def make_hardlinks(duplicates): os.link(canonical, pathname) +def remove_all_but_one(duplicates): + keep = duplicates.pop() + for pathname in duplicates: + os.remove(pathname) + + def report(duplicates): sys.stdout.write('\n'.join(duplicates)) sys.stdout.write('\n\n') @@ -214,6 +215,8 @@ def main(): help='hardlink duplicate files to each other') parser.add_option('--progress', action='store_true', help='report progress') + parser.add_option('--remove', action='store_true', + help='remove all but one copy of identical files') opts, args = parser.parse_args() @@ -225,6 +228,8 @@ def main(): for duplicates in dupfinder.duplicates(): if opts.make_hardlinks: make_hardlinks(duplicates) + elif opts.remove: + remove_all_but_one(duplicates) else: report(duplicates) |