summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2010-04-29 21:18:06 +1200
committerLars Wirzenius <liw@liw.fi>2010-04-29 21:18:06 +1200
commit42bc05a46c3ecfcb8923317ee791c8cf5cab71fa (patch)
tree39874521731402f23a21345a93002173f0d6bb2d
parented43a5cc6d5181d5dff523b5031b76939df7bbd5 (diff)
downloaddupfiles-42bc05a46c3ecfcb8923317ee791c8cf5cab71fa.tar.gz
Only investigate regular files, ignoring e.g. symlinks.
Also, sort filenames, it's nicer for people.
-rwxr-xr-xdupfiles15
1 files changed, 9 insertions, 6 deletions
diff --git a/dupfiles b/dupfiles
index 1aa9dc0..3822eaa 100755
--- a/dupfiles
+++ b/dupfiles
@@ -20,6 +20,7 @@
import hashlib
import optparse
import os
+import stat
import sys
import time
@@ -56,14 +57,16 @@ class DuplicateFileFinder(object):
for dirname, subdirs, filenames in os.walk(root):
self.progress.write(dirname)
subdirs.sort()
+ filenames.sort()
pathnames = [os.path.join(dirname, f) for f in filenames]
for pathname in pathnames:
- stat = os.lstat(pathname)
- t = (stat.st_dev, stat.st_ino, pathname)
- if stat.st_size in self.by_size:
- self.by_size[stat.st_size].append(t)
- else:
- self.by_size[stat.st_size] = [t]
+ st = os.lstat(pathname)
+ if stat.S_ISREG(st.st_mode):
+ t = (st.st_dev, st.st_ino, pathname)
+ if st.st_size in self.by_size:
+ self.by_size[st.st_size].append(t)
+ else:
+ self.by_size[st.st_size] = [t]
self.progress.finished()
def duplicates(self):