summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2010-04-25 06:15:25 +1200
committerLars Wirzenius <liw@liw.fi>2010-04-25 06:15:25 +1200
commit2fc01f7cad3f0df92aa08b22072ee51f3b1b9db4 (patch)
treebe1f1d2c1080a4efbe735da050af5a2cd2d0b337
parentd4533994918bacd3888882448781f7041ff2bf18 (diff)
downloaddupfiles-2fc01f7cad3f0df92aa08b22072ee51f3b1b9db4.tar.gz
Only report hardlinks as duplicates if there's another identical file as well.
If foo and bar are hardlinks to the same inode, and foobar is a file with the same content, report all three. However, if there is no foobar to be reported, do not report anything.
-rwxr-xr-xdupfiles3
-rwxr-xr-xtestit2
2 files changed, 4 insertions, 1 deletions
diff --git a/dupfiles b/dupfiles
index ea52d7f..dd0364a 100755
--- a/dupfiles
+++ b/dupfiles
@@ -84,6 +84,9 @@ class DuplicateFileFinder(object):
result = []
for size, tuples in self.by_size.iteritems():
+ if len(set((dev, ino) for dev, ino, pathname in tuples)) == 1:
+ # All duplicates are hardlinks to the same inode. Skip.
+ continue
by_checksum = dict()
for dev, ino, pathname in tuples:
checksum = self.file_checksum(pathname)
diff --git a/testit b/testit
index b1bdc9f..59f2659 100755
--- a/testit
+++ b/testit
@@ -92,7 +92,7 @@ class TwoHardlinksToSameContent(TestCase):
def setUp(self):
self.create('foo', 'foo')
self.hardlink('foo', 'bar')
- self.identical = ['foo', 'bar']
+ self.identical = []
class TwoHardlinksToSameContentPlusSecondIdenticalCopy(TestCase):