summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2010-04-18 09:56:53 +1200
committerLars Wirzenius <liw@liw.fi>2010-04-18 09:56:53 +1200
commit25f74b1cd85ea624e6dd588b3ba19f4738d5c2d4 (patch)
treea00d7d05820319d521bb7f931b84ae908e2f0403
parentd311a9aff8ed5857058045b0bee87536f38a1afc (diff)
downloaddupfiles-25f74b1cd85ea624e6dd588b3ba19f4738d5c2d4.tar.gz
Bugfix: store dev/ino for each pathname, not just for first file with a given size.
-rwxr-xr-xdupfiles19
1 files changed, 11 insertions, 8 deletions
diff --git a/dupfiles b/dupfiles
index 5474be9..be36f0c 100755
--- a/dupfiles
+++ b/dupfiles
@@ -59,13 +59,15 @@ class DuplicateFileFinder(object):
pathnames = [os.path.join(dirname, f) for f in filenames]
for pathname in pathnames:
stat = os.stat(pathname)
+ t = (stat.st_dev, stat.st_ino, pathname)
if stat.st_size in self.by_size:
- dev, ino, pathnames = self.by_size[stat.st_size]
- if stat.st_dev != dev or stat.st_ino != ino:
- pathnames.add(pathname)
+ for dev, ino, pathname in self.by_size[stat.st_size]:
+ if stat.st_dev == dev and stat.st_ino == ino:
+ break
+ else:
+ self.by_size[stat.st_size].append(t)
else:
- self.by_size[stat.st_size] = (stat.st_dev, stat.st_ino,
- set([pathname]))
+ self.by_size[stat.st_size] = [t]
self.progress.finished()
# FIXME: This computed the checksum for each file, the full file.
@@ -75,12 +77,12 @@ class DuplicateFileFinder(object):
# groups. If a group becomes just a single file, that file is not
# a duplicate and can be dropped completely.
def duplicates(self):
- total_files = sum(len(x[2]) for x in self.by_size.itervalues())
+ total_files = sum(len(x) for x in self.by_size.itervalues())
done_files = 0
result = []
- for dev, ino, pathnames in self.by_size.itervalues():
+ for tuples in self.by_size.itervalues():
by_checksum = dict()
- for pathname in pathnames:
+ for dev, ino, pathname in tuples:
checksum = self.file_checksum(pathname)
if checksum not in by_checksum:
by_checksum[checksum] = set()
@@ -124,6 +126,7 @@ def main():
dupfinder = DuplicateFileFinder(progress)
for dirname in sorted(args):
dupfinder.collect(dirname)
+
for duplicates in dupfinder.duplicates():
if opts.make_hardlinks:
make_hardlinks(duplicates)