diff options
author | Lars Wirzenius <liw@liw.fi> | 2010-04-29 21:18:06 +1200 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2010-04-29 21:18:06 +1200 |
commit | 42bc05a46c3ecfcb8923317ee791c8cf5cab71fa (patch) | |
tree | 39874521731402f23a21345a93002173f0d6bb2d /dupfiles | |
parent | ed43a5cc6d5181d5dff523b5031b76939df7bbd5 (diff) | |
download | dupfiles-42bc05a46c3ecfcb8923317ee791c8cf5cab71fa.tar.gz |
Only investigate regular files, ignoring e.g. symlinks.
Also, sort filenames, it's nicer for people.
Diffstat (limited to 'dupfiles')
-rwxr-xr-x | dupfiles | 15 |
1 files changed, 9 insertions, 6 deletions
@@ -20,6 +20,7 @@ import hashlib import optparse import os +import stat import sys import time @@ -56,14 +57,16 @@ class DuplicateFileFinder(object): for dirname, subdirs, filenames in os.walk(root): self.progress.write(dirname) subdirs.sort() + filenames.sort() pathnames = [os.path.join(dirname, f) for f in filenames] for pathname in pathnames: - stat = os.lstat(pathname) - t = (stat.st_dev, stat.st_ino, pathname) - if stat.st_size in self.by_size: - self.by_size[stat.st_size].append(t) - else: - self.by_size[stat.st_size] = [t] + st = os.lstat(pathname) + if stat.S_ISREG(st.st_mode): + t = (st.st_dev, st.st_ino, pathname) + if st.st_size in self.by_size: + self.by_size[st.st_size].append(t) + else: + self.by_size[st.st_size] = [t] self.progress.finished() def duplicates(self): |