From 42bc05a46c3ecfcb8923317ee791c8cf5cab71fa Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Thu, 29 Apr 2010 21:18:06 +1200 Subject: Only investigate regular files, ignoring e.g. symlinks. Also, sort filenames, it's nicer for people. --- dupfiles | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/dupfiles b/dupfiles index 1aa9dc0..3822eaa 100755 --- a/dupfiles +++ b/dupfiles @@ -20,6 +20,7 @@ import hashlib import optparse import os +import stat import sys import time @@ -56,14 +57,16 @@ class DuplicateFileFinder(object): for dirname, subdirs, filenames in os.walk(root): self.progress.write(dirname) subdirs.sort() + filenames.sort() pathnames = [os.path.join(dirname, f) for f in filenames] for pathname in pathnames: - stat = os.lstat(pathname) - t = (stat.st_dev, stat.st_ino, pathname) - if stat.st_size in self.by_size: - self.by_size[stat.st_size].append(t) - else: - self.by_size[stat.st_size] = [t] + st = os.lstat(pathname) + if stat.S_ISREG(st.st_mode): + t = (st.st_dev, st.st_ino, pathname) + if st.st_size in self.by_size: + self.by_size[st.st_size].append(t) + else: + self.by_size[st.st_size] = [t] self.progress.finished() def duplicates(self): -- cgit v1.2.1