diff options
author | Lars Wirzenius <liw@liw.fi> | 2010-04-18 09:56:53 +1200 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2010-04-18 09:56:53 +1200 |
commit | 25f74b1cd85ea624e6dd588b3ba19f4738d5c2d4 (patch) | |
tree | a00d7d05820319d521bb7f931b84ae908e2f0403 | |
parent | d311a9aff8ed5857058045b0bee87536f38a1afc (diff) | |
download | dupfiles-25f74b1cd85ea624e6dd588b3ba19f4738d5c2d4.tar.gz |
Bugfix: store dev/ino for each pathname, not just for first file with a given size.
-rwxr-xr-x | dupfiles | 19 |
1 files changed, 11 insertions, 8 deletions
@@ -59,13 +59,15 @@ class DuplicateFileFinder(object): pathnames = [os.path.join(dirname, f) for f in filenames] for pathname in pathnames: stat = os.stat(pathname) + t = (stat.st_dev, stat.st_ino, pathname) if stat.st_size in self.by_size: - dev, ino, pathnames = self.by_size[stat.st_size] - if stat.st_dev != dev or stat.st_ino != ino: - pathnames.add(pathname) + for dev, ino, pathname in self.by_size[stat.st_size]: + if stat.st_dev == dev and stat.st_ino == ino: + break + else: + self.by_size[stat.st_size].append(t) else: - self.by_size[stat.st_size] = (stat.st_dev, stat.st_ino, - set([pathname])) + self.by_size[stat.st_size] = [t] self.progress.finished() # FIXME: This computed the checksum for each file, the full file. @@ -75,12 +77,12 @@ class DuplicateFileFinder(object): # groups. If a group becomes just a single file, that file is not # a duplicate and can be dropped completely. def duplicates(self): - total_files = sum(len(x[2]) for x in self.by_size.itervalues()) + total_files = sum(len(x) for x in self.by_size.itervalues()) done_files = 0 result = [] - for dev, ino, pathnames in self.by_size.itervalues(): + for tuples in self.by_size.itervalues(): by_checksum = dict() - for pathname in pathnames: + for dev, ino, pathname in tuples: checksum = self.file_checksum(pathname) if checksum not in by_checksum: by_checksum[checksum] = set() @@ -124,6 +126,7 @@ def main(): dupfinder = DuplicateFileFinder(progress) for dirname in sorted(args): dupfinder.collect(dirname) + for duplicates in dupfinder.duplicates(): if opts.make_hardlinks: make_hardlinks(duplicates) |