summaryrefslogtreecommitdiff
path: root/dupfiles
diff options
context:
space:
mode:
Diffstat (limited to 'dupfiles')
-rwxr-xr-xdupfiles33
1 files changed, 20 insertions, 13 deletions
diff --git a/dupfiles b/dupfiles
index be36f0c..cc1ff47 100755
--- a/dupfiles
+++ b/dupfiles
@@ -81,22 +81,29 @@ class DuplicateFileFinder(object):
done_files = 0
result = []
for tuples in self.by_size.itervalues():
- by_checksum = dict()
- for dev, ino, pathname in tuples:
- checksum = self.file_checksum(pathname)
- if checksum not in by_checksum:
- by_checksum[checksum] = set()
- by_checksum[checksum].add(pathname)
- for names in by_checksum.itervalues():
- if len(names) > 1:
- result.append(names)
- done_files += len(names)
- self.progress.write('%d/%d (%.1f%%) files done' %
- (done_files, total_files,
- 100.0 * float(done_files) / total_files))
+ if len(tuples) > 1:
+ by_checksum = dict()
+ for dev, ino, pathname in tuples:
+ checksum = self.file_checksum(pathname)
+ if checksum not in by_checksum:
+ by_checksum[checksum] = set()
+ by_checksum[checksum].add(pathname)
+ done_files += 1
+ self.duplicates_progress(done_files, total_files)
+ for names in by_checksum.itervalues():
+ if len(names) > 1:
+ result.append(names)
+ else:
+ done_files += 1
+ self.duplicates_progress(done_files, total_files)
self.progress.finished()
return result
+ def duplicates_progress(self, done_files, total_files):
+ self.progress.write('%d/%d (%.1f%%) files done' %
+ (done_files, total_files,
+ 100.0 * float(done_files) / total_files))
+
def file_checksum(self, pathname):
return hashlib.md5(file(pathname, 'rb').read()).digest()