From 2fc01f7cad3f0df92aa08b22072ee51f3b1b9db4 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sun, 25 Apr 2010 06:15:25 +1200 Subject: Only report hardlinks as duplicates if there's another identical file as well. If foo and bar are hardlinks to the same inode, and foobar is a file with the same content, report all three. However, if there is no foobar to be reported, do not report anything. --- dupfiles | 3 +++ testit | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/dupfiles b/dupfiles index ea52d7f..dd0364a 100755 --- a/dupfiles +++ b/dupfiles @@ -84,6 +84,9 @@ class DuplicateFileFinder(object): result = [] for size, tuples in self.by_size.iteritems(): + if len(set((dev, ino) for dev, ino, pathname in tuples)) == 1: + # All duplicates are hardlinks to the same inode. Skip. + continue by_checksum = dict() for dev, ino, pathname in tuples: checksum = self.file_checksum(pathname) diff --git a/testit b/testit index b1bdc9f..59f2659 100755 --- a/testit +++ b/testit @@ -92,7 +92,7 @@ class TwoHardlinksToSameContent(TestCase): def setUp(self): self.create('foo', 'foo') self.hardlink('foo', 'bar') - self.identical = ['foo', 'bar'] + self.identical = [] class TwoHardlinksToSameContentPlusSecondIdenticalCopy(TestCase): -- cgit v1.2.1