diff options
Diffstat (limited to 'dupfiles')
-rwxr-xr-x | dupfiles | 33 |
1 files changed, 20 insertions, 13 deletions
@@ -81,22 +81,29 @@ class DuplicateFileFinder(object): done_files = 0 result = [] for tuples in self.by_size.itervalues(): - by_checksum = dict() - for dev, ino, pathname in tuples: - checksum = self.file_checksum(pathname) - if checksum not in by_checksum: - by_checksum[checksum] = set() - by_checksum[checksum].add(pathname) - for names in by_checksum.itervalues(): - if len(names) > 1: - result.append(names) - done_files += len(names) - self.progress.write('%d/%d (%.1f%%) files done' % - (done_files, total_files, - 100.0 * float(done_files) / total_files)) + if len(tuples) > 1: + by_checksum = dict() + for dev, ino, pathname in tuples: + checksum = self.file_checksum(pathname) + if checksum not in by_checksum: + by_checksum[checksum] = set() + by_checksum[checksum].add(pathname) + done_files += 1 + self.duplicates_progress(done_files, total_files) + for names in by_checksum.itervalues(): + if len(names) > 1: + result.append(names) + else: + done_files += 1 + self.duplicates_progress(done_files, total_files) self.progress.finished() return result + def duplicates_progress(self, done_files, total_files): + self.progress.write('%d/%d (%.1f%%) files done' % + (done_files, total_files, + 100.0 * float(done_files) / total_files)) + def file_checksum(self, pathname): return hashlib.md5(file(pathname, 'rb').read()).digest() |