From dc39bf76e7255d4a022216cc96c7bc0a9b1679fc Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sun, 25 Apr 2010 06:09:23 +1200 Subject: Do not follow symlinks when statting. Report all hardlinks to the same file as duplicates. This is probably stupid, but avoids a bug: if foo and bar are hardlinks to the same inode, and foobar is not, but has identical content, then previously it would be random whether foo or bar was reported as the hardlinks. Further, only one of foo and bar would be made into a hardlink with foobar. So the next run would report the other one as a duplicate. --- dupfiles | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'dupfiles') diff --git a/dupfiles b/dupfiles index 14a5b2d..ea52d7f 100755 --- a/dupfiles +++ b/dupfiles @@ -58,14 +58,10 @@ class DuplicateFileFinder(object): subdirs.sort() pathnames = [os.path.join(dirname, f) for f in filenames] for pathname in pathnames: - stat = os.stat(pathname) + stat = os.lstat(pathname) t = (stat.st_dev, stat.st_ino, pathname) if stat.st_size in self.by_size: - for dev, ino, pathname in self.by_size[stat.st_size]: - if stat.st_dev == dev and stat.st_ino == ino: - break - else: - self.by_size[stat.st_size].append(t) + self.by_size[stat.st_size].append(t) else: self.by_size[stat.st_size] = [t] self.progress.finished() -- cgit v1.2.1