summaryrefslogtreecommitdiff
path: root/dupfiles
diff options
context:
space:
mode:
Diffstat (limited to 'dupfiles')
-rwxr-xr-xdupfiles83
1 files changed, 44 insertions, 39 deletions
diff --git a/dupfiles b/dupfiles
index 7c8af79..78e4b5f 100755
--- a/dupfiles
+++ b/dupfiles
@@ -21,23 +21,11 @@ import errno
import hashlib
import optparse
import os
-import progressbar
import random
import stat
import sys
import time
-
-
-class ProgressBarValue(progressbar.ProgressBarWidget):
-
- def update(self, pbar):
- return '%s' % pbar.currval
-
-
-class ProgressBarMaxval(progressbar.ProgressBarWidget):
-
- def update(self, pbar):
- return '%s' % pbar.maxval
+import ttystatus
class FileStats(object):
@@ -92,10 +80,13 @@ class DuplicateFileFinder(object):
self.progress = progress
def collect(self, root):
+ ts = ttystatus.TerminalStatus()
if self.progress:
- sys.stderr.write('Scanning %s\n' % root)
+ ts.add(ttystatus.Literal('Scanning '))
+ ts.add(ttystatus.Pathname('dirname'))
for dirname, subdirs, filenames in os.walk(root):
+ ts['dirname'] = dirname
subdirs.sort()
filenames.sort()
pathnames = [os.path.join(dirname, f) for f in filenames]
@@ -107,40 +98,44 @@ class DuplicateFileFinder(object):
self.by_size[st.st_size].append(t)
else:
self.by_size[st.st_size] = [t]
+ ts.clear()
def duplicates(self):
- skip = [size for size in self.by_size if len(self.by_size[size]) == 1]
- for size in skip:
- del self.by_size[size]
-
total_bytes = sum(len(tuples) * size
- for size, tuples in self.by_size.iteritems())
+ for size, tuples in self.by_size.iteritems())
+
+ ts = ttystatus.TerminalStatus(period=0.5)
+ ts['done'] = 0
if self.progress:
- widgets = [
- progressbar.FileTransferSpeed(), ' ',
- progressbar.Percentage(), ' ',
- progressbar.Bar(), ' ',
- progressbar.ETA(),
- ]
- pbar = progressbar.ProgressBar(maxval=total_bytes, widgets=widgets)
- pbar.start()
+ ts.add(ttystatus.Literal('Comparing: '))
+ ts.add(ttystatus.ByteSize('done'))
+ ts.add(ttystatus.Literal('/'))
+ ts.add(ttystatus.ByteSize('total'))
+ ts.add(ttystatus.Literal(' ('))
+ ts.add(ttystatus.PercentDone('done', 'total'))
+ ts.add(ttystatus.Literal('), group '))
+ ts.add(ttystatus.Counter('size'))
+ ts.add(ttystatus.Literal('/'))
+ ts.add(ttystatus.Literal(str(len(self.by_size))))
+ ts.add(ttystatus.Literal(' ('))
+ ts.add(ttystatus.ByteSize('size'))
+ ts.add(ttystatus.Literal(')'))
result = []
- done_bytes = 0
+ ith = 0
for size, tuples in sorted(self.by_size.iteritems()):
- if len(set((dev, ino) for dev, ino, pathname in tuples)) == 1:
- # All duplicates are hardlinks to the same inode. Skip.
- done_bytes += len(tuples) * size
- else:
- new_dups = self.find_duplicates([p for d, i, p in tuples])
- result += new_dups
- done_bytes += len(tuples) * size
-
- if self.progress:
- pbar.update(done_bytes)
+ ith += 1
+ if len(set((dev, ino) for dev, ino, pathname in tuples)) > 1:
+ # All files are not hardlinks to the same inode.
+ # (This also excludes groups with just one file.)
+ result += self.find_duplicates([p for d, i, p in tuples])
+ ts['size'] = size
+ ts['done'] += len(tuples) * size
+ ts['total'] = total_bytes
if self.progress:
- pbar.finish()
+ ts.finish()
+
return result
def find_duplicates(self, pathnames):
@@ -203,6 +198,12 @@ def make_hardlinks(duplicates):
os.link(canonical, pathname)
+def remove_all_but_one(duplicates):
+ keep = duplicates.pop()
+ for pathname in duplicates:
+ os.remove(pathname)
+
+
def report(duplicates):
sys.stdout.write('\n'.join(duplicates))
sys.stdout.write('\n\n')
@@ -214,6 +215,8 @@ def main():
help='hardlink duplicate files to each other')
parser.add_option('--progress', action='store_true',
help='report progress')
+ parser.add_option('--remove', action='store_true',
+ help='remove all but one copy of identical files')
opts, args = parser.parse_args()
@@ -225,6 +228,8 @@ def main():
for duplicates in dupfinder.duplicates():
if opts.make_hardlinks:
make_hardlinks(duplicates)
+ elif opts.remove:
+ remove_all_but_one(duplicates)
else:
report(duplicates)